import missingno as msno
from sklearn.linear_model import LogisticRegression
from sklearn.ensemble import RandomForestClassifier
from sklearn.preprocessing import LabelEncoder
from sklearn import metrics
import pickle
import pprint
from sklearn.ensemble import RandomForestRegressor
from pandas_profiling import ProfileReport
from dateutil import relativedelta
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import warnings
warnings.filterwarnings("ignore")
from sklearn.model_selection import train_test_split
from statsmodels.regression.linear_model import OLS
def new_line():
print("\n-------------------------\n")
def RMSE(predictions):
return round(np.sqrt(((test_y - predictions)**2).mean()))
def plot_numerical_columns(col_name):
# return None
# Histogram
df[col_name].plot(kind="hist", figsize=(13,8));
plt.title(col_name, size=18);
plt.axhline(y=df[col_name].mean(), color='red');
plt.axhline(y=df[col_name].median(), color='green');
plt.legend(['Actual', 'Mean', 'Median']);
plt.show()
# Scatter plot
df[col_name].plot(figsize=(13,8));
plt.title(col_name, size=18);
plt.axhline(y=df[col_name].mean(), color='red');
plt.axhline(y=df[col_name].median(), color='green');
plt.legend(['Actual', 'Mean', 'Median']);
plt.show()
# scatter plot (sort by values), values Vs index
df[col_name].sort_values().reset_index(drop=True).plot(figsize=(13,8));
plt.title(col_name+" (SORTED)", size=18);
plt.axhline(y=df[col_name].mean(), color='red');
plt.axhline(y=df[col_name].median(), color='green');
plt.legend(['Actual', 'Mean', 'Median']);
plt.show()
# box plot
df[col_name].plot(kind="box", figsize=(13,8))
plt.title(col_name, size=18);
plt.xlabel("");
plt.show()
def plot_date_columns(col_name):
# return None
df[col_name].plot(figsize=(15,7), grid=True);
plt.xlabel("Index", size=14);
plt.ylabel("Date", size=14);
plt.title(col_name + " Graph", size=18);
plt.show();
df[col_name].sort_values().reset_index(drop=True).plot(figsize=(15,7), grid=True);
plt.xlabel("Index (sorted)", size=14);
plt.ylabel("Year", size=14);
plt.title(col_name + " Graph", size=18);
plt.show();
(df[col_name].dt.year.value_counts(sort=False).sort_index() / len(df) * 100).plot(kind="bar", figsize=(15,7), grid=True);
plt.xlabel("Year", size=14);
plt.ylabel("Ratio (1-100)", size=14);
plt.title(col_name + " year Frequency Graph", size=18);
plt.show();
(df[col_name].dt.month.value_counts().sort_index()/len(df) * 100).plot(kind="bar", figsize=(15,7), grid=True);
plt.xlabel("Month", size=14);
plt.ylabel("Ratio (1-100)", size=14);
plt.title(col_name + " month Frequency Graph", size=18);
plt.show();
(df[col_name].dt.day.value_counts().sort_index()/len(df) * 100).plot(kind="bar", figsize=(15,7), grid=True);
plt.xlabel("Day", size=14);
plt.ylabel("Ratio (1-100)", size=14);
plt.title(col_name + " Day Frequency Graph", size=18);
plt.show();
def plot_catagorical_columns(cat_variable):
# return None
(df[cat_variable].value_counts() / len(df) * 100).plot.bar(figsize=(15,6), grid=True);
plt.title(cat_variable, size=18, color='r');
plt.xlabel("Catagory", size=14, color='r');
plt.ylabel("Ratio (1-100)", size=14, color='r');
plt.show()
def data_shape():
return f"The Data have:\n\t{df.shape[0]} rows\n\t{df.shape[1]} columns\n"
#===
# df = pd.read_csv("data.csv", date_parser=True)
# df = pd.read_csv("df_only_selected_columns_using_PCA.csv", date_parser=True)
# target_variable = "ACTUAL_WORTH"
# df = pd.concat([
# df.select_dtypes("number").iloc[:, :3],
# df.select_dtypes("O").iloc[:, :3],
# df.select_dtypes(exclude=["number", "O"]),
# df[[target_variable]]], 1)
# target_variable = "AREA_NAME_EN"
df = pd.read_csv("cleaned_data.csv", date_parser=True)
target_variable = "SalePrice"
#===
f = df[target_variable].isna().sum()
if f:
new_line()
print(f"There are {f} NAs in target values, we droped those rows")
df = df[df[target_variable].notna()]
del f
#---------------------------------------------------
# df.select_dtypes("O").columns[:5]
# D = df.select_dtypes(exclude="O")
# D2 = df.select_dtypes("O").iloc[:,:5]
# df = pd.concat([D, D2], 1)
# profile = ProfileReport(df, title='Pandas Profiling Report', explorative=True)
# profile.to_file("your_report.html")
#---------------------------------------------------
new_line()
print(data_shape())
#===
new_line()
print(f"Columns types distribution:\n\n{df.dtypes.value_counts()}")
#---------------------------------------- NA
a = df.isna().sum().where(lambda x:x>0).dropna()
if a.size:
new_line()
print(f"There are {len(a)} (out of {df.shape[1]}, [{round(len(a)/df.shape[1]*100)}%]) columns that contains 1 or more")
for i in a.index:
df[i+"_NA_indicator"] = df[i].isna().replace({True : "Missing", False : "Not missing"})
new_line()
print(f"{a.size} NA_indicator variables added to the data\n")
print("========= NA Graphs =========\n")
msno.matrix(df);
plt.title("NA Graph");
plt.show()
new_line()
sns.heatmap(df.isnull(), cbar=False);
plt.title("NA Graph");
plt.show()
#===
a = a.sort_values()/len(df)*100
if (a == 100).sum():
new_line()
df.drop(columns=a[a==100].index, inplace=True)
print(f"There are {(a == 100).sum()} columns that are all Missing values, so we droped those.\nNow {data_shape()}\n\nDropped columns names:")
for i in a[a==100].index:
print("\t",i)
a = a[a != 100]
#===
x = df[a.index].dtypes.value_counts()
if x.size:
new_line()
print(f"NA columns data type Distribution:\n\n{x}")
del x
#===
new_line()
if a.size:
print(f"NaN Ratio (0-100)\n\n{a}")
else:
print("Now There is no NaN value in our Data")
#===
if df.select_dtypes("number").isna().sum().sum():
new_line()
print(f'(Before Missing values treatment)\nThere are {df.isna().sum().sum()} Missing values:\n\t{df.select_dtypes("O").isna().sum().sum()} in catagorical variables\n\t{df.select_dtypes("number").isna().sum().sum()} in numerical columns\n\t{df.select_dtypes(exclude=["O", "number"]).isna().sum().sum()} in others')
from sklearn.impute import KNNImputer
df_not_a_number = df.select_dtypes(exclude="number")
imputer = KNNImputer(n_neighbors=4, weights="uniform")
imputed = imputer.fit_transform(df.select_dtypes("number"))
df = pd.DataFrame(imputed, columns=df.select_dtypes("number").columns)
df = pd.concat([df, df_not_a_number], axis=1)
del df_not_a_number
print(f'\n(After filling numeric missing values)\nThere are {df.isna().sum().sum()} Missing values:\n\t{df.select_dtypes("O").isna().sum().sum()} in catagorical variables\n\t{df.select_dtypes("number").isna().sum().sum()} in numerical columns\n\t{df.select_dtypes(exclude=["O", "number"]).isna().sum().sum()} in others')
#===
# --------------------------------------------------------- Unique values
only_one_unique_value = df.nunique().where(lambda x:x == 1).dropna()
if only_one_unique_value.size:
new_line()
df.drop(columns=only_one_unique_value.index, inplace=True)
print(f"There are {only_one_unique_value.size} variables That have only one unique value, so we drop those.\n\nNow {data_shape()}\n\nThose columns names in order:\n")
for i in only_one_unique_value.index.sort_values():
print(i)
del only_one_unique_value
# #===
all_values_are_unique = df.apply(lambda x:x.is_unique).where(lambda x:x==True).dropna()
if all_values_are_unique.size:
new_line()
df.drop(columns=all_values_are_unique.index, inplace=True)
print(f"There are {all_values_are_unique.size} column/s that have all unique values, so no value repeatation, we droped those columns.\n\nNow {data_shape()}\nThose column/s name/s are:\n")
for i in all_values_are_unique.index:
print("\t", i)
del all_values_are_unique
#===
date_columns = []
def DTYPES():
global date_columns
catagorical_columns = df.head().select_dtypes("O").columns
numerical_columns = df.head().select_dtypes("number").columns
date_columns = []
for i in catagorical_columns:
try:
df[i] = pd.to_datetime(df[i])
date_columns.append(i)
except:
pass
catagorical_columns = catagorical_columns.drop(date_columns)
if date_columns:
date_columns = pd.Index(date_columns)
#===
if not catagorical_columns.append(numerical_columns).append(date_columns).is_unique:
new_line()
print("Some column/s repated in > 1 dtypes\n")
dtypes = pd.DataFrame({"Column" : catagorical_columns.append(numerical_columns).append(date_columns),
"dtype" : ['O']*len(catagorical_columns) + ['Number']*len(numerical_columns) + ['Date']*len(date_columns)})
print(dtypes[dtypes.Column.isin(list(dtypes[dtypes.Column.duplicated()].Column.values))].to_string())
#===
x = df.columns.difference(
catagorical_columns.append(numerical_columns).append(date_columns)
)
if x.size:
new_line()
print("Some columns not included in any existing catagory, those:\n")
for i in x:
print(f"\t<{i}, with dtype of <{df[i].dtype}>")
#===
dtypes = pd.DataFrame({"Column" : catagorical_columns.append(numerical_columns).append(date_columns),
"dtype" : ['Object']*len(catagorical_columns) + ['Number']*len(numerical_columns) + ['Date']*len(date_columns)})
return dtypes
#===
dtypes = DTYPES()
# ----------------------------------------------------------------------- Feature enginearing
# ======= Adding date columns
# >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>> add polynomial, sqrt, tree, log features
def add_new_date_cols(x, suffix):
d = {}
d[suffix + '_week_normalized'] = x.dt.week / 52
d[suffix + '_week_str'] = '"' + x.dt.week.apply(lambda x:np.nan if np.isnan(x) else str(x).replace(".0", "")) + '"'
d[suffix + '_year_after_min_year'] = x.dt.year - x.dt.year.min()
d[suffix + '_year_str'] = '"' + x.dt.year.apply(lambda x:np.nan if np.isnan(x) else str(x).replace(".0", "")) + '"'
d[suffix + '_day_name'] = x.dt.day_name()
d[suffix + '_day_after_min_date_str'] = '"' + (x - x.min()).apply(lambda x: str(x).split()[0]) + '"'
d[suffix + '_day_normalized'] = x.dt.day / 31
d[suffix + '_hour_normalized'] = x.dt.hour / 24
d[suffix + '_hour_str'] = '"' + x.dt.hour.apply(lambda x:np.nan if np.isnan(x) else str(x).replace(".0", "")) + '"'
d[suffix + '_month_name'] = x.dt.month_name()
d[suffix + '_month_normalized'] = x.dt.month/12
for k,v in d.items():
if v.nunique() > 1:
df[k] = v
return df.drop(columns=x.name)
# return df
len_df_before_adding_date_vars = df.shape[1]
for date_col in date_columns:
df = add_new_date_cols(df[date_col], date_col)
len_df_after_adding_date_vars = df.shape[1]
if len_df_after_adding_date_vars > len_df_before_adding_date_vars:
new_line()
print(f"Added {len_df_after_adding_date_vars - len_df_before_adding_date_vars} date Features")
# ======= type casting of numerical variable (those who have < 4% unique values) to catagorical variables
f = (df.select_dtypes("number").nunique() / len(df) * 100).where(lambda x:x<4).dropna().index
if f.size:
len_df_before_adding_date_vars = df.shape[1]
for col_num_to_str in f:
df[col_num_to_str+"_str"] = '"' + df[col_num_to_str].astype(str) + '"'
len_df_after_adding_date_vars = df.shape[1]
new_line()
print(f"Added {len_df_after_adding_date_vars - len_df_before_adding_date_vars} String Features (Extracted from numerical variables)")
# =======
def cluping_rare_cases_in_one_catagory(x):
global df
x = df[x]
orignal = x.copy("deep")
xx = x.value_counts()
xx = xx[xx< 10].index.to_list()
x = x.replace(xx , "Rare cases")
if x.value_counts()[-1] < 8:
x[x == "Rare cases"] = x.mode()[0] # agar "Rare cases" vali catogery me 8 sy bhi kam values hon to un ko most common value sy replace kar do
if x.nunique() == 1:
new_line()
print(f"The column <{x.name}> have only one unique value, We droped it from the data.")
# return orignal
df.drop(columns=x.name, inplace=True)
return None
return x
for var in df.select_dtypes("O").columns:
m = cluping_rare_cases_in_one_catagory(var)
if isinstance(m, pd.core.series.Series):
df[var] = m
new_line()
xx = (df == 'Rare cases').sum().sort_values().where(lambda x:x>0).dropna()
xx = pd.DataFrame({"Count" : xx,
"Ratio" : round(xx/len(df)*100, 4)})
print(f"<Rare case> catagory:\n{xx.to_string()}")
# ----------------------------------------------------------------------- END (Feature enginearing)
dtypes = DTYPES()
# ---------------------------------------------------- Correlation plot
new_line()
cor_df = df.select_dtypes('number').corr().abs()
mask = np.triu(np.ones_like(cor_df, dtype=bool));
f, ax = plt.subplots(figsize=(17, 10));
cmap = sns.color_palette("viridis", as_cmap=True);
plot_ = sns.heatmap(cor_df, mask=mask, cmap=cmap, vmax=.3, square=True, linewidths=.5, cbar_kws={"shrink": .5});
plot_.axes.set_title("abs (Correlation) plot",fontsize=25);
plt.show()
# ---------------------------------------------------------------------
#===
# m = 0
for row in dtypes.iterrows():
# m += 1
# if m == 3:
# break
column_name, type_ = row[1]
x = df[column_name]
print(f"\n\n\n========================================= {column_name} =========================================\n\n")
for col_ in df.columns:
if col_ == column_name:
continue
if df[col_].nunique() == df[column_name].nunique():
unique_combination = df[[col_, column_name]].drop_duplicates()
if unique_combination.apply(lambda x:x.is_unique).sum() == 2:
new_line()
print(f"This Columns is duplicate of <{col_}> column")
print(f"Column Type : {type_}")
if x.isna().all():
new_line()
df.drop(columns=column_name, inplace=True)
print("We dropped This column, because it is all Empty")
continue
if type_ in ["O", "Date"]:
if x.is_unique:
new_line()
df.drop(columns=column_name, inplace=True)
print(f"We dropped This column, because it's a {type_} columns, and it's all values are unique")
continue
if x.nunique() == 1:
new_line()
df.drop(columns=column_name, inplace=True)
print(f"We dropped This column, because There is only one unique value")
continue
if type_ == "Number":
local_cor = cor_df[column_name].drop(column_name).reset_index()
local_cor = local_cor.reindex(local_cor[column_name].abs().sort_values().index)
if local_cor[column_name].max() == 1:
new_line()
print(f"This column is perfactly correlated with column <{local_cor[local_cor[column_name] == 1]['index'].values[0]}, so remove one of them")
new_line()
xm = local_cor[-3:].rename(columns={'index' : 'Column name', column_name : 'Correlation'}).reset_index(drop=True)
xm.index = xm['Column name']
xm.drop(columns="Column name", inplace=True);
xm.plot(kind='barh', grid=True, figsize=(10,1.5));
plt.title("Most 3 correlated features with this columns (sorted)", size=14);
plt.xlabel("Correlation", size=12);
plt.show();
new_line()
skewness = x.skew(skipna = True)
if abs(skewness) < 0.5:
print(f"The data is fairly symmetrical (skewness is: {skewness})")
elif abs(skewness) < 1:
print(f"The data are moderately skewed (skewness is: {skewness})")
else:
print(f"The data are highly skewed (skewness is: {skewness})\nNote: When skewness exceed |1| we called it highly skewed")
# f = x.describe()
# f['Nunique'] = x.nunique()
# f['Nunique ratio'] = f.loc["Nunique"] / f.loc["count"] * 100
# f['Outlies count'] = (((x - x.mean())/x.std()).abs() > 3).sum()
# f['Outlies ratio'] = f.loc["Outlies count"] / f.loc["count"] * 100
# f['Nagative values count'] = (x < 0).sum()
# f['Nagative values ratio'] = f['Nagative values count'] / f['count'] * 100
ff = [x.count(), x.isna().sum(), x.mean(), x.std(), x.min()]
ff += x.quantile([.25,.5,.75]).to_list()
ff += [x.max(), x.nunique(), (((x - x.mean())/x.std()).abs() > 3).sum(), (x < 0).sum(), (x == 0).sum()]
f = pd.DataFrame(ff, index=['Count', 'NA', 'Mean', 'Std', 'Min', '25%', '50%', '75%', 'Max', 'Nunique', 'Outlies', 'Nagetive', 'Zeros'], columns=['Count'])
f['Ratio'] = f.Count / x.count() * 100
f.loc['Mean' : 'Max', 'Ratio'] = None
new_line()
print(f.round(2).to_string())
plot_numerical_columns(column_name)
elif type_ == "Object":
# f = x.describe()
# f = x.agg(['count', pd.Series.nunique])
# f['len'] = len(x)
# f['Na count'] = x.isna().sum()
# f['Na ratio'] = f['Na count'] / f['count'] * 100
# f['Most frequent'] = x.mode().values[0]
# f['Most frequent count'] = (x == f['Most frequent']).sum()
# f['Most frequent ratio'] = f['Most frequent count'] / f['count'] * 100
# f['Least frequent'] = x.value_counts().tail(1).index[0]
# f['Least frequent count'] = (x == f['Least frequent']).sum()
# f['Least frequent ratio'] = f['Least frequent count'] / f['count'] * 100
# f['Values occured only once count'] = x.value_counts().where(lambda x:x==1).dropna().size
# f['Values occured only once Ratio'] = f['Values occured only once count'] / x.count() * 100
l = x.count(), x.nunique(), len(x), x.isna().sum(), (x == x.mode().values[0]).sum(), (x == x.value_counts().tail(1).index[0]).sum(), x.value_counts().where(lambda x:x==1).dropna().size
f = pd.DataFrame(l, index=['Count', 'Nunique', 'Len', 'NA', 'Most frequent', 'Least frequent', 'Values occured only once'], columns=['Counts'])
f['Ratio'] = (f.Counts / x.count() * 100).round(4)
f.loc[['Len'], 'Ratio'] = None
new_line()
print(f.to_string())
if x.str.lower().nunique() != x.nunique():
new_line()
print(f"Case issue\n\tin orignal variable There are {x.nunique()} unique values\n\tin lower verstion there are {x.str.lower().nunique()} unique values.\n")
if x.str.strip().nunique() != x.nunique():
new_line()
print(f"Space issue\n\tin orignal variable There are {x.nunique()} unique values\n\tin striped verstion there are {x.str.strip().nunique()} unique values.")
plot_catagorical_columns(column_name)
elif type == "Date":
new_line()
rd = relativedelta.relativedelta( pd.to_datetime(x.max()), pd.to_datetime(x.min()))
print(f"Diffrenece between first and last date:\n\tYears : {rd.years}\n\tMonths: {rd.months}\n\tDays : {rd.days}")
# f = pd.Series({'Count' : x.count(),
# 'Nunique count' : x.nunique(),
# 'Nunique ratio' : x.nunique() / x.count() * 100,
# 'Most frequent value' : str(x.mode()[0]),
# 'Least frequent value' : x.value_counts().tail(1).index[0]
# })
# f['Most frequent count'] = (x == f['Most frequent value']).sum()
# f['Most frequent ratio'] = f['Most frequent count'] / f['Count'] * 100
# f['Least frequent count'] = (x == f['Least frequent value']).sum()
# f['Least frequent ratio'] = f['Least frequent count'] / f['Count'] * 100
# f['Values occured only once count'] = x.value_counts().where(lambda x:x==1).dropna().size
# f['Values occured only once Ratio'] = f['Values occured only once count'] / x.count() * 100
ff = x.count(), x.nunique(), (x == x.mode().values[0]).sum(), (x == x.value_counts().tail(1).index[0]).sum(), x.value_counts().where(lambda x:x==1).dropna().size
f = pd.DataFrame(ff, index=["Count", 'Nunique', 'Most frequent values', 'Least frequent values', 'Values occured only once count'], columns=['Counts'])
f['Ratio'] = (f.Counts / x.count() * 100).round(4)
new_line()
print(f"\n{f.to_string()}")
f = set(np.arange(x.dt.year.min(),x.dt.year.max()+1)).difference(
x.dt.year.unique())
if f:
new_line()
print(f"These Years (in order) are missing:\n")
for i in f:
print("\t", i, end=", ")
f = set(np.arange(x.dt.month.min(),x.dt.month.max()+1)).difference(
x.dt.month.unique())
if f:
new_line()
print(f"These Months (in order) are missing:\n")
for i in f:
print("\t", i, end=", ")
f = set(np.arange(x.dt.day.min(),x.dt.day.max()+1)).difference(
x.dt.day.unique())
if f:
new_line()
print(f"These Days (in order) are missing:\n")
for i in f:
print("\t", i, end=", ")
new_line()
plot_date_columns(column_name)
# ================================================================================================================ Modeling
print("\n\n")
print("----------------------------------------------------------------------------------------------")
print("****************************************** Modeling ******************************************")
# Regression problem
if df[target_variable].dtype in [float, int]:
print("\n-------------------- This is Regression problem --------------------\n")
print("''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''")
df_T = df.select_dtypes("number")
cat_cols = pd.get_dummies(df.select_dtypes(exclude="number"), prefix_sep="__")
df_T[cat_cols.columns.to_list()] = cat_cols
df = df_T.copy("deep")
del df_T
del cat_cols
# ====
train_X, test_X, train_y, test_y = train_test_split(df.drop(columns=target_variable), df[target_variable])
# ====
# --------------------------------------------------------- Linear regression
print("\n ------------------------------------- Linear Regression -------------------------------------\n")
model_reg = OLS(train_y, train_X).fit()
summary = model_reg.summary()
summary_df = pd.DataFrame(summary.tables[1])
summary_df.columns = summary_df.iloc[0]
summary_df.drop(0, inplace=True)
summary_df.columns = summary_df.columns.astype(str)
summary_df.columns = ["Variable"] + summary_df.columns[1:].to_list()
for i in summary_df.columns[1:]:
summary_df[i] = summary_df[i].astype(str).astype(float)
summary_df.Variable = summary_df.Variable.astype(str)
summary_df['Indicator'] = summary_df['P>|t|'].apply(lambda x:"***" if x < 0.001 else "**" if x < 0.01 else "*" if x < 0.05 else "." if x < 0.1 else "")
summary_df = summary_df.sort_values("Variable").reset_index(drop=True)
summary_df.to_csv()
new_line()
print("NOTE: This summary saved as <summary_OLS_1.csv>")
new_line()
print(summary_df.to_string())
# ============================= Model statistic
predictions = model_reg.predict(test_X)
new_line()
print(" --- Model statistic --- \n")
print(f"R-squared : {round(model_reg.rsquared, 3)}")
print(f"Adj. R-squared : {round(model_reg.rsquared_adj, 3)}")
print(f"F-statistic : {round(model_reg.fvalue)}")
print(f"Prob (F-statistic): {model_reg.f_pvalue}")
print(f"No. Observations : {round(model_reg.nobs)}")
print(f"AIC : {round(model_reg.aic)}")
print(f"Df Residuals : {round(model_reg.df_resid)}")
print(f"BIC : {round(model_reg.bic)}")
print(f"RMSE (test) : {RMSE(predictions)}")
# ======
f = train_X.copy("deep")
f['Errors__'] = model_reg.resid
f = f.corr()['Errors__'].drop("Errors__").abs().sort_values().dropna().tail(1)
new_line()
print(f"Maximum correlation between Reseduals and any data columns is {f.values[0]}, with columns <{f.index[0]}>")
print(f"Mean of train reseduals: {model_reg.resid.mean()}")
del f
# ============================= END (Model statistic)
# --------------------------------------------------------- END Linear regression
# --------------------------------------------------------- Random Forest
print("\n ------------------------------------- Random Forest -------------------------------------\n")
rf = RandomForestRegressor(n_estimators = 200, oob_score=True)
model_rf = rf.fit(train_X, train_y);
predictions_rf = rf.predict(test_X)
new_line()
print(f"RF model peramters:\n")
pprint.pprint(model_rf.get_params())
new_line()
importances = list(rf.feature_importances_)
feature_importances = [(feature, round(importance, 2)) for feature, importance in zip(test_X, importances)]
featuresImportance = pd.Series(model_rf.feature_importances_, index=train_X.columns).sort_values(ascending=False)
if len(featuresImportance) > 30:
featuresImportance = featuresImportance.head(30)
featuresImportance.plot(figsize=(20,10), kind='bar', grid=True);
plt.title("RandomForest Feature importances Graph", size=18,color='red');
plt.xlabel("Features", size=14, color='red');
plt.ylabel("Importance", size=14, color='red');
plt.show();
del featuresImportance
new_line()
print("--- Model statistic ---")
# The coefficient of determination R^2 of the prediction.
# https://scikit-learn.org/stable/modules/generated/sklearn.ensemble.RandomForestRegressor.html
print(f"R^2 (test) : {rf.score(test_X, test_y)}")
print(f"R^2 (train): {rf.score(train_X, train_y)}")
print(f"RMSE (test): {RMSE(predictions_rf)}")
print(f"oob score : {model_rf.oob_score_}")
f = test_X.copy("deep")
errors_rf = predictions_rf - test_y
f['Errors__'] = errors_rf
f = f.corr()['Errors__'].drop("Errors__").abs().sort_values().dropna().tail(1)
new_line()
print(f"Maximum correlation between Reseduals and any data columns is {f.values[0]}, with columns <{f.index[0]}>")
# --------------------------------------------------------- END Random Forest
elif df[target_variable].dtype == "O":
# Classififcation problem
if df[target_variable].nunique() == 2:
print("\n-------------------- This is Binary Classification problem --------------------\n")
print("''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''")
df = pd.concat([
df.select_dtypes(exclude = "O"),
pd.get_dummies(df.drop(columns=target_variable).select_dtypes("O")),
df[[target_variable]]
], 1)
train_X, test_X, train_y, test_y = train_test_split(df.drop(columns=target_variable), df[target_variable])
clf = LogisticRegression().fit(train_X, train_y)
predictions = clf.predict_proba(test_X)
predictions = pd.Series(predictions[:, 0])
lst = []
for thresh in np.linspace(predictions.min(), predictions.max(), 50)[1:]:
pred = predictions < thresh
pred.loc[pred == True] = clf.classes_[0]
pred.loc[pred == False] = clf.classes_[1]
test_y = test_y.reset_index(drop=True)
TN = ((pred == clf.classes_[0]) & (test_y == clf.classes_[0])).sum()
TP = ((pred == clf.classes_[1]) & (test_y == clf.classes_[1])).sum()
FN = ((pred == clf.classes_[0]) & (test_y == clf.classes_[1])).sum()
FP = ((pred == clf.classes_[1]) & (test_y == clf.classes_[0])).sum()
p = TP / (TP + FP)
r = TP / (TP + FN)
f = 2 * ((p * r) / (p+r))
lst.append((thresh, (pred == test_y).mean(), p, r , f))
d = pd.DataFrame(lst, columns=["Thresold", "Accurecy(0-1)", "Precision", "Recall", "F1"])
d = d.set_index("Thresold")
d.plot(grid=True, figsize=(18,7));
plt.title("Model performance at diffrent Thresolds", size=18, color='red');
plt.xlabel("Thresold", size=14, color='red');
plt.ylabel("");
plt.show()
else:
print("\n-------------------- This is Multiclass Classification problem --------------------\n")
print("'''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''")
df.loc[:, df.select_dtypes("O").columns] = df.select_dtypes("O").apply(lambda x: pd.Series(LabelEncoder().fit_transform(x.astype(str))).astype(str))
train_X, test_X, train_y, test_y = train_test_split(df.drop(columns=target_variable), df[target_variable])
clf=RandomForestClassifier(n_estimators=1000).fit(train_X, train_y)
predictions = clf.predict(test_X)
feature_imp = pd.Series(clf.feature_importances_,index=train_X.columns).sort_values(ascending=False)
if feature_imp.size > 30:
feature_imp = feature_imp.head(30)
feature_imp.plot(kind='barh', figsize=(17,10), grid=True);
plt.title("Feature importances Graph", size=18, color='red');
plt.xlabel("Importance", size=14, color='red');
plt.ylabel("Feature", size=14, color='red');
plt.show()
# ====
f = (test_y, predictions)
f_int = (test_y.astype(int), predictions.astype(int))
print(f"accuracy_score: {metrics.accuracy_score(*f)}")
print(f"f1_score: {metrics.f1_score(*f_int)}")
metrics.plot_roc_curve(clf, test_X, test_y);
plt.title("ROC curve plot");
plt.show();
metrics.ConfusionMatrixDisplay(metrics.confusion_matrix(*f)); plt.show()
metrics.plot_confusion_matrix(clf, test_X, test_y);
plt.title("Confusion matrix");
plt.show()
metrics.plot_precision_recall_curve(clf, test_X, test_y);
plt.title("Precision recall curve");
plt.show()
# ================================================================================================================ END Modeling
-------------------------
There are 1459 NAs in target values, we droped those rows
-------------------------
The Data have:
1460 rows
73 columns
-------------------------
Columns types distribution:
object 39
int64 23
float64 11
dtype: int64
-------------------------
Now There is no NaN value in our Data
-------------------------
There are 1 variables That have only one unique value, so we drop those.
Now The Data have:
1460 rows
72 columns
Those columns names in order:
train_or_test
-------------------------
There are 1 column/s that have all unique values, so no value repeatation, we droped those columns.
Now The Data have:
1460 rows
71 columns
Those column/s name/s are:
Id
-------------------------
Added 15 String Features (Extracted from numerical variables)
-------------------------
<Rare case> catagory:
Count Ratio
HouseStyle 8.0 0.5479
FullBath_str 9.0 0.6164
Foundation 9.0 0.6164
RoofStyle 9.0 0.6164
Neighborhood 11.0 0.7534
BedroomAbvGr_str 14.0 0.9589
Condition1 15.0 1.0274
GarageType 15.0 1.0274
Exterior2nd 17.0 1.1644
GarageCond 18.0 1.2329
SaleType 28.0 1.9178
MiscVal_str 41.0 2.8082
GarageYrBlt 160.0 10.9589
-------------------------
========================================= Alley =========================================
Column Type : Object
-------------------------
Counts Ratio
Count 1460 100.0000
Nunique 3 0.2055
Len 1460 NaN
NA 0 0.0000
Most frequent 1369 93.7671
Least frequent 41 2.8082
Values occured only once 0 0.0000
========================================= BldgType =========================================
Column Type : Object
-------------------------
Counts Ratio
Count 1460 100.0000
Nunique 5 0.3425
Len 1460 NaN
NA 0 0.0000
Most frequent 1220 83.5616
Least frequent 31 2.1233
Values occured only once 0 0.0000
========================================= BsmtCond =========================================
Column Type : Object
-------------------------
Counts Ratio
Count 1460 100.0000
Nunique 4 0.2740
Len 1460 NaN
NA 0 0.0000
Most frequent 1313 89.9315
Least frequent 37 2.5342
Values occured only once 0 0.0000
========================================= BsmtExposure =========================================
Column Type : Object
-------------------------
Counts Ratio
Count 1460 100.0000
Nunique 5 0.3425
Len 1460 NaN
NA 0 0.0000
Most frequent 953 65.2740
Least frequent 38 2.6027
Values occured only once 0 0.0000
========================================= BsmtFinType1 =========================================
Column Type : Object
-------------------------
Counts Ratio
Count 1460 100.0000
Nunique 7 0.4795
Len 1460 NaN
NA 0 0.0000
Most frequent 430 29.4521
Least frequent 37 2.5342
Values occured only once 0 0.0000
========================================= BsmtFinType2 =========================================
Column Type : Object
-------------------------
Counts Ratio
Count 1460 100.0000
Nunique 7 0.4795
Len 1460 NaN
NA 0 0.0000
Most frequent 1256 86.0274
Least frequent 14 0.9589
Values occured only once 0 0.0000
========================================= BsmtQual =========================================
Column Type : Object
-------------------------
Counts Ratio
Count 1460 100.0000
Nunique 5 0.3425
Len 1460 NaN
NA 0 0.0000
Most frequent 649 44.4521
Least frequent 35 2.3973
Values occured only once 0 0.0000
========================================= CentralAir =========================================
Column Type : Object
-------------------------
Counts Ratio
Count 1460 100.0000
Nunique 2 0.1370
Len 1460 NaN
NA 0 0.0000
Most frequent 1365 93.4932
Least frequent 95 6.5068
Values occured only once 0 0.0000
========================================= Condition1 =========================================
Column Type : Object
-------------------------
Counts Ratio
Count 1460 100.0000
Nunique 7 0.4795
Len 1460 NaN
NA 0 0.0000
Most frequent 1260 86.3014
Least frequent 11 0.7534
Values occured only once 0 0.0000
========================================= Electrical =========================================
Column Type : Object
-------------------------
Counts Ratio
Count 1460 100.0000
Nunique 3 0.2055
Len 1460 NaN
NA 0 0.0000
Most frequent 1339 91.7123
Least frequent 27 1.8493
Values occured only once 0 0.0000
========================================= ExterCond =========================================
Column Type : Object
-------------------------
Counts Ratio
Count 1460 100.0000
Nunique 3 0.2055
Len 1460 NaN
NA 0 0.0000
Most frequent 1286 88.0822
Least frequent 28 1.9178
Values occured only once 0 0.0000
========================================= ExterQual =========================================
Column Type : Object
-------------------------
Counts Ratio
Count 1460 100.0000
Nunique 4 0.2740
Len 1460 NaN
NA 0 0.0000
Most frequent 906 62.0548
Least frequent 14 0.9589
Values occured only once 0 0.0000
========================================= Exterior1st =========================================
Column Type : Object
-------------------------
Counts Ratio
Count 1460 100.0000
Nunique 10 0.6849
Len 1460 NaN
NA 0 0.0000
Most frequent 522 35.7534
Least frequent 20 1.3699
Values occured only once 0 0.0000
========================================= Exterior2nd =========================================
Column Type : Object
-------------------------
Counts Ratio
Count 1460 100.0000
Nunique 12 0.8219
Len 1460 NaN
NA 0 0.0000
Most frequent 504 34.5205
Least frequent 10 0.6849
Values occured only once 0 0.0000
========================================= Fence =========================================
Column Type : Object
-------------------------
Counts Ratio
Count 1460 100.0000
Nunique 5 0.3425
Len 1460 NaN
NA 0 0.0000
Most frequent 1179 80.7534
Least frequent 11 0.7534
Values occured only once 0 0.0000
========================================= FireplaceQu =========================================
Column Type : Object
-------------------------
Counts Ratio
Count 1460 100.0000
Nunique 6 0.4110
Len 1460 NaN
NA 0 0.0000
Most frequent 690 47.2603
Least frequent 20 1.3699
Values occured only once 0 0.0000
========================================= Foundation =========================================
Column Type : Object
-------------------------
Counts Ratio
Count 1460 100.0000
Nunique 5 0.3425
Len 1460 NaN
NA 0 0.0000
Most frequent 647 44.3151
Least frequent 9 0.6164
Values occured only once 0 0.0000
========================================= Functional =========================================
Column Type : Object
-------------------------
Counts Ratio
Count 1460 100.0000
Nunique 5 0.3425
Len 1460 NaN
NA 0 0.0000
Most frequent 1366 93.5616
Least frequent 14 0.9589
Values occured only once 0 0.0000
========================================= GarageCond =========================================
Column Type : Object
-------------------------
Counts Ratio
Count 1460 100.0000
Nunique 4 0.2740
Len 1460 NaN
NA 0 0.0000
Most frequent 1326 90.8219
Least frequent 18 1.2329
Values occured only once 0 0.0000
========================================= GarageFinish =========================================
Column Type : Object
-------------------------
Counts Ratio
Count 1460 100.0000
Nunique 4 0.2740
Len 1460 NaN
NA 0 0.0000
Most frequent 605 41.4384
Least frequent 81 5.5479
Values occured only once 0 0.0000
========================================= GarageQual =========================================
Column Type : Object
-------------------------
Counts Ratio
Count 1460 100.0000
Nunique 4 0.2740
Len 1460 NaN
NA 0 0.0000
Most frequent 1317 90.2055
Least frequent 14 0.9589
Values occured only once 0 0.0000
========================================= GarageType =========================================
Column Type : Object
-------------------------
Counts Ratio
Count 1460 100.0000
Nunique 6 0.4110
Len 1460 NaN
NA 0 0.0000
Most frequent 870 59.5890
Least frequent 15 1.0274
Values occured only once 0 0.0000
========================================= GarageYrBlt =========================================
Column Type : Object
-------------------------
Counts Ratio
Count 1460 100.0000
Nunique 59 4.0411
Len 1460 NaN
NA 0 0.0000
Most frequent 160 10.9589
Least frequent 10 0.6849
Values occured only once 0 0.0000
========================================= HeatingQC =========================================
Column Type : Object
-------------------------
Counts Ratio
Count 1460 100.0000
Nunique 4 0.2740
Len 1460 NaN
NA 0 0.0000
Most frequent 742 50.8219
Least frequent 49 3.3562
Values occured only once 0 0.0000
========================================= HouseStyle =========================================
Column Type : Object
-------------------------
Counts Ratio
Count 1460 100.0000
Nunique 8 0.5479
Len 1460 NaN
NA 0 0.0000
Most frequent 726 49.7260
Least frequent 8 0.5479
Values occured only once 0 0.0000
========================================= KitchenQual =========================================
Column Type : Object
-------------------------
Counts Ratio
Count 1460 100.0000
Nunique 4 0.2740
Len 1460 NaN
NA 0 0.0000
Most frequent 735 50.3425
Least frequent 39 2.6712
Values occured only once 0 0.0000
========================================= LandContour =========================================
Column Type : Object
-------------------------
Counts Ratio
Count 1460 100.0000
Nunique 4 0.2740
Len 1460 NaN
NA 0 0.0000
Most frequent 1311 89.7945
Least frequent 36 2.4658
Values occured only once 0 0.0000
========================================= LandSlope =========================================
Column Type : Object
-------------------------
Counts Ratio
Count 1460 100.0000
Nunique 3 0.2055
Len 1460 NaN
NA 0 0.0000
Most frequent 1382 94.6575
Least frequent 13 0.8904
Values occured only once 0 0.0000
========================================= LotConfig =========================================
Column Type : Object
-------------------------
Counts Ratio
Count 1460 100.0000
Nunique 4 0.2740
Len 1460 NaN
NA 0 0.0000
Most frequent 1056 72.3288
Least frequent 47 3.2192
Values occured only once 0 0.0000
========================================= LotShape =========================================
Column Type : Object
-------------------------
Counts Ratio
Count 1460 100.0000
Nunique 4 0.2740
Len 1460 NaN
NA 0 0.0000
Most frequent 925 63.3562
Least frequent 10 0.6849
Values occured only once 0 0.0000
========================================= MSZoning =========================================
Column Type : Object
-------------------------
Counts Ratio
Count 1460 100.0000
Nunique 5 0.3425
Len 1460 NaN
NA 0 0.0000
Most frequent 1151 78.8356
Least frequent 10 0.6849
Values occured only once 0 0.0000
========================================= MasVnrType =========================================
Column Type : Object
-------------------------
Counts Ratio
Count 1460 100.0000
Nunique 4 0.2740
Len 1460 NaN
NA 0 0.0000
Most frequent 872 59.7260
Least frequent 15 1.0274
Values occured only once 0 0.0000
========================================= MiscFeature =========================================
Column Type : Object
-------------------------
Counts Ratio
Count 1460 100.0000
Nunique 2 0.1370
Len 1460 NaN
NA 0 0.0000
Most frequent 1411 96.6438
Least frequent 49 3.3562
Values occured only once 0 0.0000
========================================= Neighborhood =========================================
Column Type : Object
-------------------------
Counts Ratio
Count 1460 100.0000
Nunique 24 1.6438
Len 1460 NaN
NA 0 0.0000
Most frequent 225 15.4110
Least frequent 11 0.7534
Values occured only once 0 0.0000
========================================= PavedDrive =========================================
Column Type : Object
-------------------------
Counts Ratio
Count 1460 100.0000
Nunique 3 0.2055
Len 1460 NaN
NA 0 0.0000
Most frequent 1340 91.7808
Least frequent 30 2.0548
Values occured only once 0 0.0000
========================================= RoofStyle =========================================
Column Type : Object
-------------------------
Counts Ratio
Count 1460 100.0000
Nunique 5 0.3425
Len 1460 NaN
NA 0 0.0000
Most frequent 1141 78.1507
Least frequent 9 0.6164
Values occured only once 0 0.0000
========================================= SaleCondition =========================================
Column Type : Object
-------------------------
Counts Ratio
Count 1460 100.0000
Nunique 5 0.3425
Len 1460 NaN
NA 0 0.0000
Most frequent 1202 82.3288
Least frequent 12 0.8219
Values occured only once 0 0.0000
========================================= SaleType =========================================
Column Type : Object
-------------------------
Counts Ratio
Count 1460 100.0000
Nunique 4 0.2740
Len 1460 NaN
NA 0 0.0000
Most frequent 1267 86.7808
Least frequent 28 1.9178
Values occured only once 0 0.0000
========================================= BedroomAbvGr_str =========================================
Column Type : Object
-------------------------
Counts Ratio
Count 1460 100.0000
Nunique 6 0.4110
Len 1460 NaN
NA 0 0.0000
Most frequent 804 55.0685
Least frequent 14 0.9589
Values occured only once 0 0.0000
========================================= BsmtFullBath_str =========================================
Column Type : Object
-------------------------
Counts Ratio
Count 1460 100.0000
Nunique 3 0.2055
Len 1460 NaN
NA 0 0.0000
Most frequent 857 58.6986
Least frequent 15 1.0274
Values occured only once 0 0.0000
========================================= BsmtHalfBath_str =========================================
Column Type : Object
-------------------------
Counts Ratio
Count 1460 100.0000
Nunique 2 0.1370
Len 1460 NaN
NA 0 0.0000
Most frequent 1380 94.5205
Least frequent 80 5.4795
Values occured only once 0 0.0000
========================================= Fireplaces_str =========================================
Column Type : Object
-------------------------
Counts Ratio
Count 1460 100.0000
Nunique 3 0.2055
Len 1460 NaN
NA 0 0.0000
Most frequent 695 47.6027
Least frequent 115 7.8767
Values occured only once 0 0.0000
========================================= FullBath_str =========================================
-------------------------
This Columns is duplicate of <FullBath> column
Column Type : Object
-------------------------
Counts Ratio
Count 1460 100.0000
Nunique 4 0.2740
Len 1460 NaN
NA 0 0.0000
Most frequent 768 52.6027
Least frequent 9 0.6164
Values occured only once 0 0.0000
========================================= GarageCars_str =========================================
Column Type : Object
-------------------------
Counts Ratio
Count 1460 100.0000
Nunique 4 0.2740
Len 1460 NaN
NA 0 0.0000
Most frequent 829 56.7808
Least frequent 81 5.5479
Values occured only once 0 0.0000
========================================= HalfBath_str =========================================
-------------------------
This Columns is duplicate of <HalfBath> column
Column Type : Object
-------------------------
Counts Ratio
Count 1460 100.0000
Nunique 3 0.2055
Len 1460 NaN
NA 0 0.0000
Most frequent 913 62.5342
Least frequent 12 0.8219
Values occured only once 0 0.0000
========================================= KitchenAbvGr_str =========================================
Column Type : Object
-------------------------
Counts Ratio
Count 1460 100.0000
Nunique 2 0.1370
Len 1460 NaN
NA 0 0.0000
Most frequent 1395 95.5479
Least frequent 65 4.4521
Values occured only once 0 0.0000
========================================= MSSubClass_str =========================================
Column Type : Object
-------------------------
Counts Ratio
Count 1460 100.0000
Nunique 14 0.9589
Len 1460 NaN
NA 0 0.0000
Most frequent 540 36.9863
Least frequent 10 0.6849
Values occured only once 0 0.0000
========================================= MiscVal_str =========================================
Column Type : Object
-------------------------
Counts Ratio
Count 1460 100.0000
Nunique 3 0.2055
Len 1460 NaN
NA 0 0.0000
Most frequent 1408 96.4384
Least frequent 11 0.7534
Values occured only once 0 0.0000
========================================= MoSold_str =========================================
-------------------------
This Columns is duplicate of <MoSold> column
Column Type : Object
-------------------------
Counts Ratio
Count 1460 100.0000
Nunique 12 0.8219
Len 1460 NaN
NA 0 0.0000
Most frequent 253 17.3288
Least frequent 52 3.5616
Values occured only once 0 0.0000
========================================= OverallCond_str =========================================
Column Type : Object
-------------------------
Counts Ratio
Count 1460 100.0000
Nunique 7 0.4795
Len 1460 NaN
NA 0 0.0000
Most frequent 827 56.6438
Least frequent 22 1.5068
Values occured only once 0 0.0000
========================================= OverallQual_str =========================================
Column Type : Object
-------------------------
Counts Ratio
Count 1460 100.0000
Nunique 8 0.5479
Len 1460 NaN
NA 0 0.0000
Most frequent 402 27.5342
Least frequent 18 1.2329
Values occured only once 0 0.0000
========================================= TotRmsAbvGrd_str =========================================
Column Type : Object
-------------------------
Counts Ratio
Count 1460 100.0000
Nunique 10 0.6849
Len 1460 NaN
NA 0 0.0000
Most frequent 404 27.6712
Least frequent 11 0.7534
Values occured only once 0 0.0000
========================================= YrSold_str =========================================
-------------------------
This Columns is duplicate of <YrSold> column
Column Type : Object
-------------------------
Counts Ratio
Count 1460 100.0000
Nunique 5 0.3425
Len 1460 NaN
NA 0 0.0000
Most frequent 338 23.1507
Least frequent 175 11.9863
Values occured only once 0 0.0000
========================================= 1stFlrSF ========================================= Column Type : Number -------------------------
-------------------------
The data are highly skewed (skewness is: 1.3767566220336365)
Note: When skewness exceed |1| we called it highly skewed
-------------------------
Count Ratio
Count 1460.00 100.00
NA 0.00 0.00
Mean 1162.63 NaN
Std 386.59 NaN
Min 334.00 NaN
25% 882.00 NaN
50% 1087.00 NaN
75% 1391.25 NaN
Max 4692.00 NaN
Nunique 753.00 51.58
Outlies 12.00 0.82
Nagetive 0.00 0.00
Zeros 0.00 0.00
========================================= 2ndFlrSF ========================================= Column Type : Number -------------------------
-------------------------
The data are moderately skewed (skewness is: 0.8130298163023265)
-------------------------
Count Ratio
Count 1460.00 100.00
NA 0.00 0.00
Mean 346.99 NaN
Std 436.53 NaN
Min 0.00 NaN
25% 0.00 NaN
50% 0.00 NaN
75% 728.00 NaN
Max 2065.00 NaN
Nunique 417.00 28.56
Outlies 4.00 0.27
Nagetive 0.00 0.00
Zeros 829.00 56.78
========================================= BedroomAbvGr ========================================= Column Type : Number -------------------------
-------------------------
The data is fairly symmetrical (skewness is: 0.21179009627507137)
-------------------------
Count Ratio
Count 1460.00 100.00
NA 0.00 0.00
Mean 2.87 NaN
Std 0.82 NaN
Min 0.00 NaN
25% 2.00 NaN
50% 3.00 NaN
75% 3.00 NaN
Max 8.00 NaN
Nunique 8.00 0.55
Outlies 14.00 0.96
Nagetive 0.00 0.00
Zeros 6.00 0.41
========================================= BsmtFinSF1 ========================================= Column Type : Number -------------------------
-------------------------
The data are highly skewed (skewness is: 1.685503071910789)
Note: When skewness exceed |1| we called it highly skewed
-------------------------
Count Ratio
Count 1460.00 100.00
NA 0.00 0.00
Mean 443.64 NaN
Std 456.10 NaN
Min 0.00 NaN
25% 0.00 NaN
50% 383.50 NaN
75% 712.25 NaN
Max 5644.00 NaN
Nunique 637.00 43.63
Outlies 6.00 0.41
Nagetive 0.00 0.00
Zeros 467.00 31.99
========================================= BsmtFinSF2 ========================================= Column Type : Number -------------------------
-------------------------
The data are highly skewed (skewness is: 4.255261108933303)
Note: When skewness exceed |1| we called it highly skewed
-------------------------
Count Ratio
Count 1460.00 100.00
NA 0.00 0.00
Mean 46.55 NaN
Std 161.32 NaN
Min 0.00 NaN
25% 0.00 NaN
50% 0.00 NaN
75% 0.00 NaN
Max 1474.00 NaN
Nunique 144.00 9.86
Outlies 50.00 3.42
Nagetive 0.00 0.00
Zeros 1293.00 88.56
========================================= BsmtFullBath ========================================= Column Type : Number -------------------------
-------------------------
The data are moderately skewed (skewness is: 0.596066609663168)
-------------------------
Count Ratio
Count 1460.00 100.00
NA 0.00 0.00
Mean 0.43 NaN
Std 0.52 NaN
Min 0.00 NaN
25% 0.00 NaN
50% 0.00 NaN
75% 1.00 NaN
Max 3.00 NaN
Nunique 4.00 0.27
Outlies 16.00 1.10
Nagetive 0.00 0.00
Zeros 856.00 58.63
========================================= BsmtHalfBath ========================================= Column Type : Number -------------------------
-------------------------
The data are highly skewed (skewness is: 4.103402697955168)
Note: When skewness exceed |1| we called it highly skewed
-------------------------
Count Ratio
Count 1460.00 100.00
NA 0.00 0.00
Mean 0.06 NaN
Std 0.24 NaN
Min 0.00 NaN
25% 0.00 NaN
50% 0.00 NaN
75% 0.00 NaN
Max 2.00 NaN
Nunique 3.00 0.21
Outlies 82.00 5.62
Nagetive 0.00 0.00
Zeros 1378.00 94.38
========================================= BsmtUnfSF ========================================= Column Type : Number -------------------------
-------------------------
The data are moderately skewed (skewness is: 0.9202684528039037)
-------------------------
Count Ratio
Count 1460.00 100.00
NA 0.00 0.00
Mean 567.24 NaN
Std 441.87 NaN
Min 0.00 NaN
25% 223.00 NaN
50% 477.50 NaN
75% 808.00 NaN
Max 2336.00 NaN
Nunique 780.00 53.42
Outlies 11.00 0.75
Nagetive 0.00 0.00
Zeros 118.00 8.08
========================================= EnclosedPorch ========================================= Column Type : Number -------------------------
-------------------------
The data are highly skewed (skewness is: 3.08987190371177)
Note: When skewness exceed |1| we called it highly skewed
-------------------------
Count Ratio
Count 1460.00 100.00
NA 0.00 0.00
Mean 21.95 NaN
Std 61.12 NaN
Min 0.00 NaN
25% 0.00 NaN
50% 0.00 NaN
75% 0.00 NaN
Max 552.00 NaN
Nunique 120.00 8.22
Outlies 51.00 3.49
Nagetive 0.00 0.00
Zeros 1252.00 85.75
========================================= Fireplaces ========================================= Column Type : Number -------------------------
-------------------------
The data are moderately skewed (skewness is: 0.6495651830548841)
-------------------------
Count Ratio
Count 1460.00 100.00
NA 0.00 0.00
Mean 0.61 NaN
Std 0.64 NaN
Min 0.00 NaN
25% 0.00 NaN
50% 1.00 NaN
75% 1.00 NaN
Max 3.00 NaN
Nunique 4.00 0.27
Outlies 5.00 0.34
Nagetive 0.00 0.00
Zeros 690.00 47.26
========================================= FullBath ========================================= ------------------------- This Columns is duplicate of <FullBath_str> column Column Type : Number -------------------------
-------------------------
The data is fairly symmetrical (skewness is: 0.036561558402727165)
-------------------------
Count Ratio
Count 1460.00 100.00
NA 0.00 0.00
Mean 1.57 NaN
Std 0.55 NaN
Min 0.00 NaN
25% 1.00 NaN
50% 2.00 NaN
75% 2.00 NaN
Max 3.00 NaN
Nunique 4.00 0.27
Outlies 0.00 0.00
Nagetive 0.00 0.00
Zeros 9.00 0.62
========================================= GarageArea ========================================= Column Type : Number -------------------------
-------------------------
The data is fairly symmetrical (skewness is: 0.17998090674623907)
-------------------------
Count Ratio
Count 1460.00 100.00
NA 0.00 0.00
Mean 472.98 NaN
Std 213.80 NaN
Min 0.00 NaN
25% 334.50 NaN
50% 480.00 NaN
75% 576.00 NaN
Max 1418.00 NaN
Nunique 441.00 30.21
Outlies 7.00 0.48
Nagetive 0.00 0.00
Zeros 81.00 5.55
========================================= GarageCars ========================================= Column Type : Number -------------------------
-------------------------
The data is fairly symmetrical (skewness is: -0.3425489297486655)
-------------------------
Count Ratio
Count 1460.00 100.00
NA 0.00 0.00
Mean 1.77 NaN
Std 0.75 NaN
Min 0.00 NaN
25% 1.00 NaN
50% 2.00 NaN
75% 2.00 NaN
Max 4.00 NaN
Nunique 5.00 0.34
Outlies 0.00 0.00
Nagetive 0.00 0.00
Zeros 81.00 5.55
========================================= GrLivArea ========================================= Column Type : Number -------------------------
-------------------------
The data are highly skewed (skewness is: 1.3665603560164552)
Note: When skewness exceed |1| we called it highly skewed
-------------------------
Count Ratio
Count 1460.00 100.00
NA 0.00 0.00
Mean 1515.46 NaN
Std 525.48 NaN
Min 334.00 NaN
25% 1129.50 NaN
50% 1464.00 NaN
75% 1776.75 NaN
Max 5642.00 NaN
Nunique 861.00 58.97
Outlies 16.00 1.10
Nagetive 0.00 0.00
Zeros 0.00 0.00
========================================= HalfBath ========================================= ------------------------- This Columns is duplicate of <HalfBath_str> column Column Type : Number -------------------------
-------------------------
The data are moderately skewed (skewness is: 0.675897448233722)
-------------------------
Count Ratio
Count 1460.00 100.00
NA 0.00 0.00
Mean 0.38 NaN
Std 0.50 NaN
Min 0.00 NaN
25% 0.00 NaN
50% 0.00 NaN
75% 1.00 NaN
Max 2.00 NaN
Nunique 3.00 0.21
Outlies 12.00 0.82
Nagetive 0.00 0.00
Zeros 913.00 62.53
========================================= KitchenAbvGr ========================================= Column Type : Number -------------------------
-------------------------
The data are highly skewed (skewness is: 4.488396777072859)
Note: When skewness exceed |1| we called it highly skewed
-------------------------
Count Ratio
Count 1460.00 100.00
NA 0.00 0.00
Mean 1.05 NaN
Std 0.22 NaN
Min 0.00 NaN
25% 1.00 NaN
50% 1.00 NaN
75% 1.00 NaN
Max 3.00 NaN
Nunique 4.00 0.27
Outlies 68.00 4.66
Nagetive 0.00 0.00
Zeros 1.00 0.07
========================================= LotArea ========================================= Column Type : Number -------------------------
-------------------------
The data are highly skewed (skewness is: 12.207687851233496)
Note: When skewness exceed |1| we called it highly skewed
-------------------------
Count Ratio
Count 1460.00 100.00
NA 0.00 0.00
Mean 10516.83 NaN
Std 9981.26 NaN
Min 1300.00 NaN
25% 7553.50 NaN
50% 9478.50 NaN
75% 11601.50 NaN
Max 215245.00 NaN
Nunique 1073.00 73.49
Outlies 13.00 0.89
Nagetive 0.00 0.00
Zeros 0.00 0.00
========================================= LotFrontage ========================================= Column Type : Number -------------------------
-------------------------
The data are highly skewed (skewness is: 2.402352471321692)
Note: When skewness exceed |1| we called it highly skewed
-------------------------
Count Ratio
Count 1460.00 100.0
NA 0.00 0.0
Mean 69.92 NaN
Std 22.03 NaN
Min 21.00 NaN
25% 60.00 NaN
50% 69.31 NaN
75% 79.00 NaN
Max 313.00 NaN
Nunique 111.00 7.6
Outlies 16.00 1.1
Nagetive 0.00 0.0
Zeros 0.00 0.0
========================================= MSSubClass ========================================= Column Type : Number -------------------------
-------------------------
The data are highly skewed (skewness is: 1.4076567471495591)
Note: When skewness exceed |1| we called it highly skewed
-------------------------
Count Ratio
Count 1460.0 100.00
NA 0.0 0.00
Mean 56.9 NaN
Std 42.3 NaN
Min 20.0 NaN
25% 20.0 NaN
50% 50.0 NaN
75% 70.0 NaN
Max 190.0 NaN
Nunique 15.0 1.03
Outlies 30.0 2.05
Nagetive 0.0 0.00
Zeros 0.0 0.00
========================================= MasVnrArea ========================================= Column Type : Number -------------------------
-------------------------
The data are highly skewed (skewness is: 2.676545581771927)
Note: When skewness exceed |1| we called it highly skewed
-------------------------
Count Ratio
Count 1460.00 100.00
NA 0.00 0.00
Mean 103.68 NaN
Std 180.57 NaN
Min 0.00 NaN
25% 0.00 NaN
50% 0.00 NaN
75% 164.25 NaN
Max 1600.00 NaN
Nunique 328.00 22.47
Outlies 32.00 2.19
Nagetive 0.00 0.00
Zeros 861.00 58.97
========================================= MiscVal ========================================= Column Type : Number -------------------------
-------------------------
The data are highly skewed (skewness is: 24.476794188821916)
Note: When skewness exceed |1| we called it highly skewed
-------------------------
Count Ratio
Count 1460.00 100.00
NA 0.00 0.00
Mean 43.49 NaN
Std 496.12 NaN
Min 0.00 NaN
25% 0.00 NaN
50% 0.00 NaN
75% 0.00 NaN
Max 15500.00 NaN
Nunique 21.00 1.44
Outlies 8.00 0.55
Nagetive 0.00 0.00
Zeros 1408.00 96.44
========================================= MoSold ========================================= ------------------------- This Columns is duplicate of <MoSold_str> column Column Type : Number -------------------------
-------------------------
The data is fairly symmetrical (skewness is: 0.21205298505146022)
-------------------------
Count Ratio
Count 1460.00 100.00
NA 0.00 0.00
Mean 6.32 NaN
Std 2.70 NaN
Min 1.00 NaN
25% 5.00 NaN
50% 6.00 NaN
75% 8.00 NaN
Max 12.00 NaN
Nunique 12.00 0.82
Outlies 0.00 0.00
Nagetive 0.00 0.00
Zeros 0.00 0.00
========================================= OpenPorchSF ========================================= Column Type : Number -------------------------
-------------------------
The data are highly skewed (skewness is: 2.3643417403694404)
Note: When skewness exceed |1| we called it highly skewed
-------------------------
Count Ratio
Count 1460.00 100.00
NA 0.00 0.00
Mean 46.66 NaN
Std 66.26 NaN
Min 0.00 NaN
25% 0.00 NaN
50% 25.00 NaN
75% 68.00 NaN
Max 547.00 NaN
Nunique 202.00 13.84
Outlies 27.00 1.85
Nagetive 0.00 0.00
Zeros 656.00 44.93
========================================= OverallCond ========================================= Column Type : Number -------------------------
-------------------------
The data are moderately skewed (skewness is: 0.6930674724842182)
-------------------------
Count Ratio
Count 1460.00 100.00
NA 0.00 0.00
Mean 5.58 NaN
Std 1.11 NaN
Min 1.00 NaN
25% 5.00 NaN
50% 5.00 NaN
75% 6.00 NaN
Max 9.00 NaN
Nunique 9.00 0.62
Outlies 28.00 1.92
Nagetive 0.00 0.00
Zeros 0.00 0.00
========================================= OverallQual ========================================= Column Type : Number -------------------------
-------------------------
The data is fairly symmetrical (skewness is: 0.2169439277628693)
-------------------------
Count Ratio
Count 1460.00 100.00
NA 0.00 0.00
Mean 6.10 NaN
Std 1.38 NaN
Min 1.00 NaN
25% 5.00 NaN
50% 6.00 NaN
75% 7.00 NaN
Max 10.00 NaN
Nunique 10.00 0.68
Outlies 2.00 0.14
Nagetive 0.00 0.00
Zeros 0.00 0.00
========================================= SalePrice ========================================= Column Type : Number -------------------------
-------------------------
The data are highly skewed (skewness is: 1.8828757597682129)
Note: When skewness exceed |1| we called it highly skewed
-------------------------
Count Ratio
Count 1460.0 100.00
NA 0.0 0.00
Mean 180921.2 NaN
Std 79442.5 NaN
Min 34900.0 NaN
25% 129975.0 NaN
50% 163000.0 NaN
75% 214000.0 NaN
Max 755000.0 NaN
Nunique 663.0 45.41
Outlies 22.0 1.51
Nagetive 0.0 0.00
Zeros 0.0 0.00
========================================= ScreenPorch ========================================= Column Type : Number -------------------------
-------------------------
The data are highly skewed (skewness is: 4.122213743143115)
Note: When skewness exceed |1| we called it highly skewed
-------------------------
Count Ratio
Count 1460.00 100.00
NA 0.00 0.00
Mean 15.06 NaN
Std 55.76 NaN
Min 0.00 NaN
25% 0.00 NaN
50% 0.00 NaN
75% 0.00 NaN
Max 480.00 NaN
Nunique 76.00 5.21
Outlies 55.00 3.77
Nagetive 0.00 0.00
Zeros 1344.00 92.05
========================================= TotRmsAbvGrd ========================================= Column Type : Number -------------------------
-------------------------
The data are moderately skewed (skewness is: 0.6763408364355531)
-------------------------
Count Ratio
Count 1460.00 100.00
NA 0.00 0.00
Mean 6.52 NaN
Std 1.63 NaN
Min 2.00 NaN
25% 5.00 NaN
50% 6.00 NaN
75% 7.00 NaN
Max 14.00 NaN
Nunique 12.00 0.82
Outlies 12.00 0.82
Nagetive 0.00 0.00
Zeros 0.00 0.00
========================================= TotalBsmtSF ========================================= Column Type : Number -------------------------
-------------------------
The data are highly skewed (skewness is: 1.5242545490627664)
Note: When skewness exceed |1| we called it highly skewed
-------------------------
Count Ratio
Count 1460.00 100.00
NA 0.00 0.00
Mean 1057.43 NaN
Std 438.71 NaN
Min 0.00 NaN
25% 795.75 NaN
50% 991.50 NaN
75% 1298.25 NaN
Max 6110.00 NaN
Nunique 721.00 49.38
Outlies 10.00 0.68
Nagetive 0.00 0.00
Zeros 37.00 2.53
========================================= WoodDeckSF ========================================= Column Type : Number -------------------------
-------------------------
The data are highly skewed (skewness is: 1.5413757571931312)
Note: When skewness exceed |1| we called it highly skewed
-------------------------
Count Ratio
Count 1460.00 100.00
NA 0.00 0.00
Mean 94.24 NaN
Std 125.34 NaN
Min 0.00 NaN
25% 0.00 NaN
50% 0.00 NaN
75% 168.00 NaN
Max 857.00 NaN
Nunique 274.00 18.77
Outlies 22.00 1.51
Nagetive 0.00 0.00
Zeros 761.00 52.12
========================================= YearBuilt ========================================= Column Type : Number -------------------------
-------------------------
The data are moderately skewed (skewness is: -0.613461172488183)
-------------------------
Count Ratio
Count 1460.00 100.00
NA 0.00 0.00
Mean 1971.27 NaN
Std 30.20 NaN
Min 1872.00 NaN
25% 1954.00 NaN
50% 1973.00 NaN
75% 2000.00 NaN
Max 2010.00 NaN
Nunique 112.00 7.67
Outlies 6.00 0.41
Nagetive 0.00 0.00
Zeros 0.00 0.00
========================================= YearRemodAdd ========================================= Column Type : Number -------------------------
-------------------------
The data are moderately skewed (skewness is: -0.5035620027004709)
-------------------------
Count Ratio
Count 1460.00 100.00
NA 0.00 0.00
Mean 1984.87 NaN
Std 20.65 NaN
Min 1950.00 NaN
25% 1967.00 NaN
50% 1994.00 NaN
75% 2004.00 NaN
Max 2010.00 NaN
Nunique 61.00 4.18
Outlies 0.00 0.00
Nagetive 0.00 0.00
Zeros 0.00 0.00
========================================= YrSold ========================================= ------------------------- This Columns is duplicate of <YrSold_str> column Column Type : Number -------------------------
-------------------------
The data is fairly symmetrical (skewness is: 0.09626851386568028)
-------------------------
Count Ratio
Count 1460.00 100.00
NA 0.00 0.00
Mean 2007.82 NaN
Std 1.33 NaN
Min 2006.00 NaN
25% 2007.00 NaN
50% 2008.00 NaN
75% 2009.00 NaN
Max 2010.00 NaN
Nunique 5.00 0.34
Outlies 0.00 0.00
Nagetive 0.00 0.00
Zeros 0.00 0.00
----------------------------------------------------------------------------------------------
****************************************** Modeling ******************************************
-------------------- This is Regression problem --------------------
''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''
------------------------------------- Linear Regression -------------------------------------
-------------------------
NOTE: This summary saved as <summary_OLS_1.csv>
-------------------------
Variable coef std err t P>|t| [0.025 0.975] Indicator
0 1stFlrSF -18.9291 31.979 -0.592 0.554 -81.703 43.845
1 2ndFlrSF -0.7634 30.868 -0.025 0.980 -61.357 59.830
2 Alley__Grvl 2605.5787 4932.448 0.528 0.597 -7076.714 12300.000
3 Alley__No alley access 137.6717 3489.529 0.039 0.969 -6712.200 6987.544
4 Alley__Pave -2743.1919 5317.879 -0.516 0.606 -13200.000 7695.693
5 BedroomAbvGr -3597.4928 4643.740 -0.775 0.439 -12700.000 5518.071
6 BedroomAbvGr_str__"1" -747.0720 11300.000 -0.066 0.947 -22900.000 21400.000
7 BedroomAbvGr_str__"2" 729.8443 6427.864 0.114 0.910 -11900.000 13300.000
8 BedroomAbvGr_str__"3" -450.6655 3615.612 -0.125 0.901 -7548.037 6646.706
9 BedroomAbvGr_str__"4" 7809.2207 5142.021 1.519 0.129 -2284.459 17900.000
10 BedroomAbvGr_str__"5" -9867.5121 11300.000 -0.875 0.382 -32000.000 12300.000
11 BedroomAbvGr_str__Rare cases 2526.2431 11100.000 0.228 0.820 -19200.000 24300.000
12 BldgType__1Fam 2640.0462 16800.000 0.157 0.875 -30400.000 35700.000
13 BldgType__2fmCon 9404.5974 40500.000 0.232 0.816 -70000.000 88900.000
14 BldgType__Duplex -9000.5928 7315.770 -1.230 0.219 -23400.000 5360.111
15 BldgType__Twnhs -1765.9982 16200.000 -0.109 0.913 -33600.000 30100.000
16 BldgType__TwnhsE -1277.9941 15400.000 -0.083 0.934 -31400.000 28900.000
17 BsmtCond__Fa -2291.7883 5112.848 -0.448 0.654 -12300.000 7744.626
18 BsmtCond__Gd 1815.1592 4647.127 0.391 0.696 -7307.053 10900.000
19 BsmtCond__No Basement -1146.3490 6853.290 -0.167 0.867 -14600.000 12300.000
20 BsmtCond__TA 1623.0366 3442.676 0.471 0.637 -5134.864 8380.937
21 BsmtExposure__Av -955.3657 6265.186 -0.152 0.879 -13300.000 11300.000
22 BsmtExposure__Gd 19180.0000 6699.586 2.863 0.004 6030.572 32300.000 **
23 BsmtExposure__Mn -2522.3406 6537.230 -0.386 0.700 -15400.000 10300.000
24 BsmtExposure__No -7207.7651 6050.742 -1.191 0.234 -19100.000 4669.714
25 BsmtExposure__No Basement -8496.1886 23300.000 -0.365 0.715 -54200.000 37200.000
26 BsmtFinSF1 -2.1508 4.058 -0.530 0.596 -10.116 5.814
27 BsmtFinSF2 9.5969 8.770 1.094 0.274 -7.619 26.813
28 BsmtFinType1__ALQ 4102.1275 2929.984 1.400 0.162 -1649.371 9853.626
29 BsmtFinType1__BLQ 331.6853 3209.358 0.103 0.918 -5968.217 6631.588
30 BsmtFinType1__GLQ 3969.4932 3101.691 1.280 0.201 -2119.062 10100.000
31 BsmtFinType1__LwQ -2964.4002 4409.169 -0.672 0.502 -11600.000 5690.705
32 BsmtFinType1__No Basement -1146.3490 6853.290 -0.167 0.867 -14600.000 12300.000
33 BsmtFinType1__Rec 1337.3169 3440.467 0.389 0.698 -5416.249 8090.882
34 BsmtFinType1__Unf -5629.8152 3118.153 -1.805 0.071 -11800.000 491.054 .
35 BsmtFinType2__ALQ -9194.9585 8364.803 -1.099 0.272 -25600.000 7224.974
36 BsmtFinType2__BLQ 2206.1554 6212.058 0.355 0.723 -9987.985 14400.000
37 BsmtFinType2__GLQ -3490.8332 9335.643 -0.374 0.709 -21800.000 14800.000
38 BsmtFinType2__LwQ 1429.4090 5496.851 0.260 0.795 -9360.794 12200.000
39 BsmtFinType2__No Basement -1146.3490 6853.290 -0.167 0.867 -14600.000 12300.000
40 BsmtFinType2__Rec 1424.8590 5220.650 0.273 0.785 -8823.168 11700.000
41 BsmtFinType2__Unf 8771.7758 5249.636 1.671 0.095 -1533.149 19100.000 .
42 BsmtFullBath 3373.6210 11900.000 0.282 0.778 -20100.000 26800.000
43 BsmtFullBath_str__"0.0" 9008.7075 12600.000 0.712 0.477 -15800.000 33800.000
44 BsmtFullBath_str__"1.0" 12040.0000 4748.957 2.535 0.011 2715.731 21400.000 *
45 BsmtFullBath_str__"2.0" -21050.0000 14800.000 -1.418 0.157 -50200.000 8089.782
46 BsmtHalfBath 966.2138 1596.820 0.605 0.545 -2168.311 4100.739
47 BsmtHalfBath_str__"0.0" -966.1553 1597.006 -0.605 0.545 -4101.044 2168.733
48 BsmtHalfBath_str__"1.0" 966.2138 1596.820 0.605 0.545 -2168.311 4100.739
49 BsmtQual__Ex 10400.0000 4854.970 2.142 0.032 870.463 19900.000 *
50 BsmtQual__Fa -5840.2387 6853.000 -0.852 0.394 -19300.000 7612.056
51 BsmtQual__Gd -847.1803 3500.227 -0.242 0.809 -7718.053 6023.692
52 BsmtQual__No Basement -1146.3490 6853.290 -0.167 0.867 -14600.000 12300.000
53 BsmtQual__TA -2566.8404 3499.546 -0.733 0.463 -9436.375 4302.694
54 BsmtUnfSF -6.3844 4.070 -1.569 0.117 -14.373 1.605
55 CentralAir__N -846.8181 3020.771 -0.280 0.779 -6776.527 5082.891
56 CentralAir__Y 846.8767 3020.765 0.280 0.779 -5082.821 6776.574
57 Condition1__Artery 3732.8700 6192.364 0.603 0.547 -8422.611 15900.000
58 Condition1__Feedr -496.4521 5065.223 -0.098 0.922 -10400.000 9446.474
59 Condition1__Norm 10920.0000 3472.399 3.145 0.002 4105.120 17700.000 **
60 Condition1__PosN -563.8500 7897.333 -0.071 0.943 -16100.000 14900.000
61 Condition1__RRAe -18820.0000 13200.000 -1.431 0.153 -44600.000 7000.931
62 Condition1__RRAn 7528.7156 7083.730 1.063 0.288 -6376.497 21400.000
63 Condition1__Rare cases -2303.4221 9221.158 -0.250 0.803 -20400.000 15800.000
64 Electrical__FuseA 3211.1933 3976.491 0.808 0.420 -4594.575 11000.000
65 Electrical__FuseF -1947.4463 5474.071 -0.356 0.722 -12700.000 8798.041
66 Electrical__SBrkr -1263.6885 3296.593 -0.383 0.702 -7734.832 5207.455
67 EnclosedPorch 8.3079 19.643 0.423 0.672 -30.251 46.866
68 ExterCond__Fa 4465.3177 5747.467 0.777 0.437 -6816.840 15700.000
69 ExterCond__Gd -6896.6053 3721.336 -1.853 0.064 -14200.000 408.299 .
70 ExterCond__TA 2431.3460 3148.612 0.772 0.440 -3749.313 8612.005
71 ExterQual__Ex -312.4044 7410.732 -0.042 0.966 -14900.000 14200.000
72 ExterQual__Fa 8784.3626 13000.000 0.678 0.498 -16600.000 34200.000
73 ExterQual__Gd -1930.8438 5030.099 -0.384 0.701 -11800.000 7943.134
74 ExterQual__TA -6541.0559 4949.599 -1.322 0.187 -16300.000 3174.903
75 Exterior1st__AsbShng -13900.0000 16100.000 -0.861 0.389 -45600.000 17800.000
76 Exterior1st__BrkFace 11970.0000 8076.529 1.482 0.139 -3881.228 27800.000
77 Exterior1st__CemntBd 34380.0000 22800.000 1.510 0.131 -10300.000 79100.000
78 Exterior1st__HdBoard -4457.8950 7229.429 -0.617 0.538 -18600.000 9733.323
79 Exterior1st__MetalSd -6254.5170 15600.000 -0.400 0.689 -37000.000 24400.000
80 Exterior1st__Plywood -336.5469 7381.400 -0.046 0.964 -14800.000 14200.000
81 Exterior1st__Stucco -846.7644 14200.000 -0.059 0.953 -28800.000 27100.000
82 Exterior1st__VinylSd -15190.0000 8383.864 -1.812 0.070 -31600.000 1269.085 .
83 Exterior1st__Wd Sdng -2063.0215 6229.668 -0.331 0.741 -14300.000 10200.000
84 Exterior1st__WdShing -3306.7264 8627.406 -0.383 0.702 -20200.000 13600.000
85 Exterior2nd__AsbShng 9117.7905 15700.000 0.582 0.561 -21700.000 39900.000
86 Exterior2nd__BrkFace 811.8004 10900.000 0.074 0.941 -20600.000 22300.000
87 Exterior2nd__CmentBd -51790.0000 23700.000 -2.186 0.029 -98300.000 -5274.610 *
88 Exterior2nd__HdBoard 2726.4321 7007.409 0.389 0.697 -11000.000 16500.000
89 Exterior2nd__ImStucc 26090.0000 11800.000 2.220 0.027 3022.361 49200.000 *
90 Exterior2nd__MetalSd 9677.6213 15800.000 0.611 0.542 -21400.000 40800.000
91 Exterior2nd__Plywood 3312.2103 6478.681 0.511 0.609 -9405.305 16000.000
92 Exterior2nd__Rare cases 2604.9128 9905.871 0.263 0.793 -16800.000 22000.000
93 Exterior2nd__Stucco -23580.0000 14800.000 -1.592 0.112 -52700.000 5493.237
94 Exterior2nd__VinylSd 14500.0000 8237.632 1.760 0.079 -1674.449 30700.000 .
95 Exterior2nd__Wd Sdng 4142.5850 6151.754 0.673 0.501 -7933.179 16200.000
96 Exterior2nd__Wd Shng 2394.6791 7476.449 0.320 0.749 -12300.000 17100.000
97 Fence__GdPrv 2352.7718 4990.257 0.471 0.637 -7442.998 12100.000
98 Fence__GdWo 1200.5834 5164.365 0.232 0.816 -8936.957 11300.000
99 Fence__MnPrv 3279.3900 4081.948 0.803 0.422 -4733.389 11300.000
100 Fence__MnWw -4941.3647 10900.000 -0.451 0.652 -26400.000 16500.000
101 Fence__No Fence -1891.3219 3372.180 -0.561 0.575 -8510.841 4728.197
102 FireplaceQu__Ex 11880.0000 6631.453 1.792 0.074 -1135.124 24900.000 .
103 FireplaceQu__Fa -4539.2003 5748.670 -0.790 0.430 -15800.000 6745.319
104 FireplaceQu__Gd -2270.0968 3015.670 -0.753 0.452 -8189.793 3649.600
105 FireplaceQu__No Fireplace -1538.4713 770.427 -1.997 0.046 -3050.803 -26.139 *
106 FireplaceQu__Po -634.2478 6632.834 -0.096 0.924 -13700.000 12400.000
107 FireplaceQu__TA -2900.2054 3268.343 -0.887 0.375 -9315.893 3515.483
108 Fireplaces -17470.0000 5177.698 -3.374 0.001 -27600.000 -7303.447 **
109 Fireplaces_str__"0" -25760.0000 5323.387 -4.839 0.000 -36200.000 -15300.000 ***
110 Fireplaces_str__"1" -3675.9313 1685.955 -2.180 0.030 -6985.425 -366.437 *
111 Fireplaces_str__"2" 29430.0000 5724.041 5.142 0.000 18200.000 40700.000 ***
112 Foundation__BrkTil -2600.6615 5740.920 -0.453 0.651 -13900.000 8668.643
113 Foundation__CBlock 2769.6422 4486.126 0.617 0.537 -6036.530 11600.000
114 Foundation__PConc 4089.2367 4717.156 0.867 0.386 -5170.441 13300.000
115 Foundation__Rare cases 1568.3519 10500.000 0.150 0.881 -19000.000 22100.000
116 Foundation__Slab -5826.5108 13300.000 -0.437 0.662 -32000.000 20300.000
117 FullBath 6058.7534 6120.779 0.990 0.323 -5956.207 18100.000
118 FullBath_str__"1" -16390.0000 8447.365 -1.941 0.053 -33000.000 188.950 .
119 FullBath_str__"2" -14450.0000 4225.459 -3.420 0.001 -22700.000 -6158.111 **
120 FullBath_str__"3" 17120.0000 4009.263 4.270 0.000 9248.902 25000.000 ***
121 FullBath_str__Rare cases 13730.0000 8408.071 1.633 0.103 -2778.160 30200.000
122 Functional__Maj1 -16200.0000 10500.000 -1.544 0.123 -36800.000 4393.314
123 Functional__Min1 -2100.2088 6308.488 -0.333 0.739 -14500.000 10300.000
124 Functional__Min2 8853.0233 6764.342 1.309 0.191 -4425.237 22100.000
125 Functional__Mod -2294.5000 7879.046 -0.291 0.771 -17800.000 13200.000
126 Functional__Typ 11740.0000 4315.963 2.720 0.007 3266.759 20200.000 **
127 GarageArea -0.4762 12.020 -0.040 0.968 -24.071 23.119
128 GarageCars 17210.0000 7976.569 2.157 0.031 1547.973 32900.000 *
129 GarageCars_str__"0.0" 2601.8175 2343.661 1.110 0.267 -1998.740 7202.375
130 GarageCars_str__"1.0" 5055.3689 7573.893 0.667 0.505 -9812.025 19900.000
131 GarageCars_str__"2.0" -6389.0263 1983.306 -3.221 0.001 -10300.000 -2495.837 **
132 GarageCars_str__"3.0" -1268.1016 8968.512 -0.141 0.888 -18900.000 16300.000
133 GarageCond__Fa -3618.5777 6222.544 -0.582 0.561 -15800.000 8596.146
134 GarageCond__No Garage 2601.8175 2343.661 1.110 0.267 -1998.740 7202.375
135 GarageCond__Rare cases -3414.7673 6891.991 -0.495 0.620 -16900.000 10100.000
136 GarageCond__TA 4431.5860 4744.790 0.934 0.351 -4882.336 13700.000
137 GarageFinish__Fin 939.9693 2090.693 0.450 0.653 -3164.017 5043.955
138 GarageFinish__No Garage 2601.8175 2343.661 1.110 0.267 -1998.740 7202.375
139 GarageFinish__RFn -643.9639 1808.495 -0.356 0.722 -4194.001 2906.073
140 GarageFinish__Unf -2897.7644 2154.438 -1.345 0.179 -7126.880 1331.351
141 GarageQual__Fa -6870.6135 6183.708 -1.111 0.267 -19000.000 5267.876
142 GarageQual__Gd 4780.3569 7666.879 0.624 0.533 -10300.000 19800.000
143 GarageQual__No Garage 2601.8175 2343.661 1.110 0.267 -1998.740 7202.375
144 GarageQual__TA -511.5024 4677.222 -0.109 0.913 -9692.792 8669.787
145 GarageType__Attchd 6575.8057 3232.901 2.034 0.042 229.689 12900.000 *
146 GarageType__Basment 3422.5864 8049.200 0.425 0.671 -12400.000 19200.000
147 GarageType__BuiltIn -5692.1896 4878.686 -1.167 0.244 -15300.000 3884.569
148 GarageType__Detchd 5929.9652 3716.066 1.596 0.111 -1364.594 13200.000
149 GarageType__No Garage 2601.8175 2343.661 1.110 0.267 -1998.740 7202.375
150 GarageType__Rare cases -12840.0000 8674.122 -1.480 0.139 -29900.000 4189.194
151 GarageYrBlt__1920.0 8890.2752 11100.000 0.800 0.424 -12900.000 30700.000
152 GarageYrBlt__1925.0 -5147.7715 12700.000 -0.405 0.686 -30100.000 19800.000
153 GarageYrBlt__1940.0 147.7335 10200.000 0.014 0.988 -19900.000 20200.000
154 GarageYrBlt__1941.0 -13030.0000 11700.000 -1.109 0.268 -36100.000 10000.000
155 GarageYrBlt__1948.0 6120.5497 10300.000 0.592 0.554 -14200.000 26400.000
156 GarageYrBlt__1950.0 9453.3317 8616.078 1.097 0.273 -7459.848 26400.000
157 GarageYrBlt__1953.0 -8170.0970 11200.000 -0.729 0.466 -30200.000 13800.000
158 GarageYrBlt__1954.0 5616.8723 8383.653 0.670 0.503 -10800.000 22100.000
159 GarageYrBlt__1955.0 5002.4776 11200.000 0.448 0.654 -16900.000 26900.000
160 GarageYrBlt__1956.0 502.3539 8533.073 0.059 0.953 -16200.000 17300.000
161 GarageYrBlt__1957.0 -12370.0000 8778.900 -1.410 0.159 -29600.000 4858.379
162 GarageYrBlt__1958.0 -4305.6688 8491.188 -0.507 0.612 -21000.000 12400.000
163 GarageYrBlt__1959.0 -3143.2312 11300.000 -0.277 0.781 -25400.000 19100.000
164 GarageYrBlt__1960.0 2156.7368 9286.725 0.232 0.816 -16100.000 20400.000
165 GarageYrBlt__1961.0 821.4058 9523.383 0.086 0.931 -17900.000 19500.000
166 GarageYrBlt__1962.0 2352.5008 8579.673 0.274 0.784 -14500.000 19200.000
167 GarageYrBlt__1963.0 -5176.7958 11900.000 -0.435 0.664 -28500.000 18200.000
168 GarageYrBlt__1964.0 4404.6787 8352.097 0.527 0.598 -12000.000 20800.000
169 GarageYrBlt__1965.0 4515.2740 7759.576 0.582 0.561 -10700.000 19700.000
170 GarageYrBlt__1966.0 7713.2053 8452.440 0.913 0.362 -8878.758 24300.000
171 GarageYrBlt__1967.0 -2796.3429 9749.350 -0.287 0.774 -21900.000 16300.000
172 GarageYrBlt__1968.0 2161.7574 7329.970 0.295 0.768 -12200.000 16600.000
173 GarageYrBlt__1969.0 -4376.0380 9844.444 -0.445 0.657 -23700.000 14900.000
174 GarageYrBlt__1970.0 -6066.7877 8479.347 -0.715 0.475 -22700.000 10600.000
175 GarageYrBlt__1971.0 -8834.2134 10400.000 -0.848 0.397 -29300.000 11600.000
176 GarageYrBlt__1972.0 5459.8952 12600.000 0.433 0.665 -19300.000 30200.000
177 GarageYrBlt__1973.0 -5189.7205 10900.000 -0.474 0.636 -26700.000 16300.000
178 GarageYrBlt__1974.0 3708.8970 8733.497 0.425 0.671 -13400.000 20900.000
179 GarageYrBlt__1976.0 -547.9292 7466.268 -0.073 0.942 -15200.000 14100.000
180 GarageYrBlt__1977.0 -9148.6240 6577.647 -1.391 0.165 -22100.000 3763.159
181 GarageYrBlt__1978.0 -4003.6845 8986.478 -0.446 0.656 -21600.000 13600.000
182 GarageYrBlt__1979.0 8813.8773 9689.640 0.910 0.363 -10200.000 27800.000
183 GarageYrBlt__1980.0 -3602.2659 9586.221 -0.376 0.707 -22400.000 15200.000
184 GarageYrBlt__1981.0 -3467.5224 12400.000 -0.279 0.780 -27900.000 20900.000
185 GarageYrBlt__1985.0 -16340.0000 11100.000 -1.476 0.140 -38100.000 5390.701
186 GarageYrBlt__1987.0 7774.5942 10900.000 0.715 0.475 -13600.000 29100.000
187 GarageYrBlt__1988.0 -2355.4834 8842.483 -0.266 0.790 -19700.000 15000.000
188 GarageYrBlt__1989.0 -3939.7325 11600.000 -0.339 0.735 -26700.000 18900.000
189 GarageYrBlt__1990.0 -2281.7813 9148.622 -0.249 0.803 -20200.000 15700.000
190 GarageYrBlt__1992.0 -15540.0000 9630.900 -1.613 0.107 -34400.000 3368.385
191 GarageYrBlt__1993.0 -1556.7949 7988.941 -0.195 0.846 -17200.000 14100.000
192 GarageYrBlt__1994.0 6794.4503 8460.653 0.803 0.422 -9813.633 23400.000
193 GarageYrBlt__1995.0 13130.0000 7779.559 1.688 0.092 -2141.221 28400.000 .
194 GarageYrBlt__1996.0 14630.0000 8853.710 1.652 0.099 -2751.196 32000.000 .
195 GarageYrBlt__1997.0 4909.9572 7633.986 0.643 0.520 -10100.000 19900.000
196 GarageYrBlt__1998.0 1565.3608 7327.271 0.214 0.831 -12800.000 15900.000
197 GarageYrBlt__1999.0 -6415.8034 7600.197 -0.844 0.399 -21300.000 8503.225
198 GarageYrBlt__2000.0 3484.3090 7562.350 0.461 0.645 -11400.000 18300.000
199 GarageYrBlt__2001.0 6898.9150 8026.015 0.860 0.390 -8855.984 22700.000
200 GarageYrBlt__2002.0 -1586.3749 7392.060 -0.215 0.830 -16100.000 12900.000
201 GarageYrBlt__2003.0 -4813.0710 6569.778 -0.733 0.464 -17700.000 8083.265
202 GarageYrBlt__2004.0 -1465.3036 6752.263 -0.217 0.828 -14700.000 11800.000
203 GarageYrBlt__2005.0 -6917.1792 6128.087 -1.129 0.259 -18900.000 5112.126
204 GarageYrBlt__2006.0 2816.1685 7154.678 0.394 0.694 -11200.000 16900.000
205 GarageYrBlt__2007.0 -15590.0000 7336.068 -2.125 0.034 -30000.000 -1187.476 *
206 GarageYrBlt__2008.0 8908.1849 9669.565 0.921 0.357 -10100.000 27900.000
207 GarageYrBlt__2009.0 32890.0000 9997.430 3.290 0.001 13300.000 52500.000 **
208 GarageYrBlt__No Garage 2601.8175 2343.661 1.110 0.267 -1998.740 7202.375
209 GarageYrBlt__Rare cases -6057.8308 3967.706 -1.527 0.127 -13800.000 1730.693
210 GrLivArea 58.5267 31.474 1.860 0.063 -3.257 120.310 .
211 HalfBath 2858.3009 5280.756 0.541 0.588 -7507.713 13200.000
212 HalfBath_str__"0" -1193.7418 1131.388 -1.055 0.292 -3414.632 1027.149
213 HalfBath_str__"1" -470.7004 5361.759 -0.088 0.930 -11000.000 10100.000
214 HalfBath_str__"2" 1664.5006 5199.734 0.320 0.749 -8542.469 11900.000
215 HeatingQC__Ex 2249.8611 2424.900 0.928 0.354 -2510.167 7009.889
216 HeatingQC__Fa 1641.9802 4488.338 0.366 0.715 -7168.534 10500.000
217 HeatingQC__Gd -2790.8347 2501.144 -1.116 0.265 -7700.528 2118.858
218 HeatingQC__TA -1100.9481 2269.879 -0.485 0.628 -5556.673 3354.777
219 HouseStyle__1.5Fin -243.1712 11800.000 -0.021 0.984 -23400.000 22900.000
220 HouseStyle__1.5Unf 20120.0000 22200.000 0.907 0.365 -23400.000 63700.000
221 HouseStyle__1Story 24520.0000 10400.000 2.368 0.018 4191.716 44800.000 *
222 HouseStyle__2.5Unf -29770.0000 21800.000 -1.368 0.172 -72500.000 12900.000
223 HouseStyle__2Story -8629.9625 10400.000 -0.833 0.405 -29000.000 11700.000
224 HouseStyle__Rare cases -40760.0000 24700.000 -1.650 0.099 -89200.000 7725.341 .
225 HouseStyle__SFoyer 25490.0000 17000.000 1.502 0.133 -7816.221 58800.000
226 HouseStyle__SLvl 9273.4006 16200.000 0.573 0.567 -22500.000 41000.000
227 KitchenAbvGr -3074.7682 12200.000 -0.253 0.800 -26900.000 20800.000
228 KitchenAbvGr_str__"1" -3091.8578 7442.467 -0.415 0.678 -17700.000 11500.000
229 KitchenAbvGr_str__"2" 3091.9163 7442.191 0.415 0.678 -11500.000 17700.000
230 KitchenQual__Ex 12250.0000 4598.609 2.664 0.008 3222.825 21300.000 **
231 KitchenQual__Fa 6815.9285 5923.506 1.151 0.250 -4811.789 18400.000
232 KitchenQual__Gd -7953.1610 2864.050 -2.777 0.006 -13600.000 -2331.091 **
233 KitchenQual__TA -11110.0000 2783.603 -3.992 0.000 -16600.000 -5648.353 ***
234 LandContour__Bnk -19710.0000 4663.028 -4.226 0.000 -28900.000 -10600.000 ***
235 LandContour__HLS 8699.8297 5018.809 1.733 0.083 -1151.987 18600.000 .
236 LandContour__Low 7869.9105 6514.508 1.208 0.227 -4917.932 20700.000
237 LandContour__Lvl 3137.3933 3509.407 0.894 0.372 -3751.498 10000.000
238 LandSlope__Gtl 4614.4167 5781.516 0.798 0.425 -6734.578 16000.000
239 LandSlope__Mod 5538.7763 5639.141 0.982 0.326 -5530.740 16600.000
240 LandSlope__Sev -10150.0000 9607.560 -1.057 0.291 -29000.000 8706.304
241 LotArea 0.6113 0.153 3.985 0.000 0.310 0.912 ***
242 LotConfig__Corner -757.2013 2534.947 -0.299 0.765 -5733.249 4218.846
243 LotConfig__CulDSac 6540.6631 3437.058 1.903 0.057 -206.209 13300.000 .
244 LotConfig__FR2 -4392.0185 4487.580 -0.979 0.328 -13200.000 4417.007
245 LotConfig__Inside -1391.3848 2088.164 -0.666 0.505 -5490.407 2707.638
246 LotFrontage -197.8470 62.644 -3.158 0.002 -320.815 -74.879 **
247 LotShape__IR1 1053.4970 3729.306 0.282 0.778 -6267.053 8374.047
248 LotShape__IR2 4438.4363 5170.172 0.858 0.391 -5710.502 14600.000
249 LotShape__IR3 -9003.9892 9387.265 -0.959 0.338 -27400.000 9423.015
250 LotShape__Reg 3512.1145 3816.623 0.920 0.358 -3979.837 11000.000
251 MSSubClass -226.1444 877.357 -0.258 0.797 -1948.378 1496.089
252 MSSubClass_str__"120" -15290.0000 34500.000 -0.443 0.658 -83000.000 52400.000
253 MSSubClass_str__"160" 6137.3503 69300.000 0.089 0.929 -130000.000 142000.000
254 MSSubClass_str__"180" 5304.4473 87600.000 0.061 0.952 -167000.000 177000.000
255 MSSubClass_str__"190" 9404.5974 40500.000 0.232 0.816 -70000.000 88900.000
256 MSSubClass_str__"20" -13500.0000 57400.000 -0.235 0.814 -126000.000 99200.000
257 MSSubClass_str__"30" -15310.0000 50400.000 -0.304 0.761 -114000.000 83600.000
258 MSSubClass_str__"45" 3510.6140 43200.000 0.081 0.935 -81300.000 88300.000
259 MSSubClass_str__"50" 1147.4145 32500.000 0.035 0.972 -62700.000 65000.000
260 MSSubClass_str__"60" -2337.5392 23500.000 -0.100 0.921 -48400.000 43700.000
261 MSSubClass_str__"70" 12940.0000 17500.000 0.741 0.459 -21300.000 47200.000
262 MSSubClass_str__"75" 27280.0000 27600.000 0.987 0.324 -27000.000 81500.000
263 MSSubClass_str__"80" 7411.0776 16500.000 0.450 0.653 -24900.000 39700.000
264 MSSubClass_str__"85" -17700.0000 17600.000 -1.005 0.315 -52300.000 16900.000
265 MSSubClass_str__"90" -9000.5928 7315.770 -1.230 0.219 -23400.000 5360.111
266 MSZoning__C (all) -24950.0000 11400.000 -2.188 0.029 -47300.000 -2566.795 *
267 MSZoning__FV 4174.3482 8360.192 0.499 0.618 -12200.000 20600.000
268 MSZoning__RH 814.8452 8842.482 0.092 0.927 -16500.000 18200.000
269 MSZoning__RL 10410.0000 4674.320 2.227 0.026 1231.802 19600.000 *
270 MSZoning__RM 9549.7076 5628.541 1.697 0.090 -1499.001 20600.000 .
271 MasVnrArea 5.1575 8.543 0.604 0.546 -11.613 21.928
272 MasVnrType__BrkCmn -2497.4653 7598.832 -0.329 0.742 -17400.000 12400.000
273 MasVnrType__BrkFace 1201.8715 3148.718 0.382 0.703 -4978.996 7382.739
274 MasVnrType__None 3443.5586 3315.077 1.039 0.299 -3063.869 9950.986
275 MasVnrType__Stone -2147.9062 4027.552 -0.533 0.594 -10100.000 5758.093
276 MiscFeature__None -19310.0000 15100.000 -1.278 0.201 -49000.000 10300.000
277 MiscFeature__Shed 19310.0000 15100.000 1.278 0.201 -10300.000 49000.000
278 MiscVal 3.7499 3.093 1.212 0.226 -2.321 9.821
279 MiscVal_str__"0" 30290.0000 21000.000 1.439 0.151 -11000.000 71600.000
280 MiscVal_str__"400" -14200.0000 12400.000 -1.145 0.253 -38600.000 10100.000
281 MiscVal_str__Rare cases -16080.0000 11900.000 -1.355 0.176 -39400.000 7218.501
282 MoSold -370.3038 382.192 -0.969 0.333 -1120.539 379.931
283 MoSold_str__"1" -140.0924 3870.086 -0.036 0.971 -7736.990 7456.805
284 MoSold_str__"10" -6137.7369 3622.897 -1.694 0.091 -13200.000 973.935 .
285 MoSold_str__"11" 975.0645 3561.219 0.274 0.784 -6015.534 7965.663
286 MoSold_str__"12" 1233.6126 3682.759 0.335 0.738 -5995.567 8462.792
287 MoSold_str__"2" -2404.6106 4566.538 -0.527 0.599 -11400.000 6559.409
288 MoSold_str__"3" 1148.4276 3400.245 0.338 0.736 -5526.183 7823.038
289 MoSold_str__"4" 1333.9458 3079.686 0.433 0.665 -4711.414 7379.306
290 MoSold_str__"5" 466.7314 2696.955 0.173 0.863 -4827.334 5760.797
291 MoSold_str__"6" -647.4570 2410.146 -0.269 0.788 -5378.523 4083.609
292 MoSold_str__"7" 2434.4470 2469.463 0.986 0.325 -2413.057 7281.951
293 MoSold_str__"8" -516.6694 3301.023 -0.157 0.876 -6996.508 5963.169
294 MoSold_str__"9" 2254.3959 4001.965 0.563 0.573 -5601.377 10100.000
295 Neighborhood__Blmngtn -3826.8162 11100.000 -0.344 0.731 -25700.000 18000.000
296 Neighborhood__BrDale -2358.5819 13400.000 -0.176 0.860 -28600.000 23900.000
297 Neighborhood__BrkSide -2615.4167 7465.791 -0.350 0.726 -17300.000 12000.000
298 Neighborhood__ClearCr -4279.7648 8244.046 -0.519 0.604 -20500.000 11900.000
299 Neighborhood__CollgCr -1554.8091 4397.474 -0.354 0.724 -10200.000 7077.340
300 Neighborhood__Crawfor 12860.0000 7198.784 1.787 0.074 -1268.979 27000.000 .
301 Neighborhood__Edwards -18550.0000 4770.574 -3.889 0.000 -27900.000 -9186.292 ***
302 Neighborhood__Gilbert -3120.4680 5725.753 -0.545 0.586 -14400.000 8119.065
303 Neighborhood__IDOTRR -9857.8751 10200.000 -0.968 0.333 -29800.000 10100.000
304 Neighborhood__MeadowV 8832.6782 14100.000 0.626 0.532 -18900.000 36500.000
305 Neighborhood__Mitchel -12520.0000 5925.014 -2.113 0.035 -24100.000 -886.133 *
306 Neighborhood__NAmes -10790.0000 4126.423 -2.614 0.009 -18900.000 -2685.108 **
307 Neighborhood__NWAmes -8388.4586 5415.841 -1.549 0.122 -19000.000 2242.724
308 Neighborhood__NoRidge 27530.0000 7374.038 3.733 0.000 13100.000 42000.000 ***
309 Neighborhood__NridgHt 27620.0000 6127.616 4.507 0.000 15600.000 39600.000 ***
310 Neighborhood__OldTown -12880.0000 7780.393 -1.656 0.098 -28200.000 2389.453 .
311 Neighborhood__Rare cases -4479.3430 13900.000 -0.323 0.747 -31700.000 22800.000
312 Neighborhood__SWISU -11060.0000 9985.532 -1.108 0.268 -30700.000 8540.764
313 Neighborhood__Sawyer -8603.3173 5402.972 -1.592 0.112 -19200.000 2002.604
314 Neighborhood__SawyerW -1448.2180 5449.716 -0.266 0.791 -12100.000 9249.461
315 Neighborhood__Somerst 16880.0000 8033.437 2.101 0.036 1105.697 32600.000 *
316 Neighborhood__StoneBr 39090.0000 7806.995 5.006 0.000 23800.000 54400.000 ***
317 Neighborhood__Timber -12070.0000 6419.267 -1.880 0.061 -24700.000 535.206 .
318 Neighborhood__Veenker -4404.9191 11400.000 -0.385 0.700 -26900.000 18100.000
319 OpenPorchSF -3.2971 16.424 -0.201 0.841 -35.536 28.942
320 OverallCond 7712.1195 5687.559 1.356 0.175 -3452.440 18900.000
321 OverallCond_str__"3" 7418.5480 17400.000 0.426 0.671 -26800.000 41600.000
322 OverallCond_str__"4" -3601.7436 12500.000 -0.289 0.772 -28000.000 20800.000
323 OverallCond_str__"5" 2330.1291 6882.565 0.339 0.735 -11200.000 15800.000
324 OverallCond_str__"6" 3693.6861 2984.103 1.238 0.216 -2164.045 9551.417
325 OverallCond_str__"7" 2550.6378 6351.050 0.402 0.688 -9916.340 15000.000
326 OverallCond_str__"8" -6046.9613 12100.000 -0.500 0.617 -29800.000 17700.000
327 OverallCond_str__"9" -6344.2378 18700.000 -0.340 0.734 -43000.000 30300.000
328 OverallQual 6459.9570 6973.470 0.926 0.355 -7228.818 20100.000
329 OverallQual_str__"10" 29980.0000 26100.000 1.150 0.250 -21200.000 81100.000
330 OverallQual_str__"3" 9933.3339 25200.000 0.394 0.693 -39500.000 59400.000
331 OverallQual_str__"4" -8478.0481 17600.000 -0.480 0.631 -43100.000 26200.000
332 OverallQual_str__"5" -14130.0000 11400.000 -1.234 0.218 -36600.000 8345.062
333 OverallQual_str__"6" -15470.0000 5186.080 -2.983 0.003 -25600.000 -5289.570 **
334 OverallQual_str__"7" -15860.0000 4848.309 -3.271 0.001 -25400.000 -6339.594 **
335 OverallQual_str__"8" -5974.5707 10800.000 -0.552 0.581 -27200.000 15300.000
336 OverallQual_str__"9" 20000.0000 18300.000 1.090 0.276 -16000.000 56000.000
337 PavedDrive__N 4091.2429 3899.456 1.049 0.294 -3563.307 11700.000
338 PavedDrive__P -5695.4005 4953.456 -1.150 0.251 -15400.000 4028.130
339 PavedDrive__Y 1604.2162 3219.673 0.498 0.618 -4715.935 7924.367
340 RoofStyle__Flat -8829.5348 10600.000 -0.830 0.407 -29700.000 12100.000
341 RoofStyle__Gable -2518.5274 4737.129 -0.532 0.595 -11800.000 6780.358
342 RoofStyle__Gambrel -2735.9434 12300.000 -0.222 0.824 -26900.000 21500.000
343 RoofStyle__Hip -934.7188 5101.934 -0.183 0.855 -10900.000 9080.271
344 RoofStyle__Rare cases 15020.0000 11000.000 1.368 0.172 -6531.359 36600.000
345 SaleCondition__Abnorml 97.8715 5951.463 0.016 0.987 -11600.000 11800.000
346 SaleCondition__Alloca 9125.4332 11400.000 0.804 0.422 -13200.000 31400.000
347 SaleCondition__Family -1113.4516 7984.812 -0.139 0.889 -16800.000 14600.000
348 SaleCondition__Normal 6107.3744 5180.921 1.179 0.239 -4062.666 16300.000
349 SaleCondition__Partial -14220.0000 15200.000 -0.938 0.349 -44000.000 15500.000
350 SaleType__COD -2932.0952 7287.739 -0.402 0.688 -17200.000 11400.000
351 SaleType__New 22050.0000 14400.000 1.534 0.125 -6163.613 50300.000
352 SaleType__Rare cases -10430.0000 6984.289 -1.494 0.136 -24100.000 3278.935
353 SaleType__WD -8682.6872 5621.273 -1.545 0.123 -19700.000 2351.754
354 ScreenPorch 64.2126 17.739 3.620 0.000 29.392 99.033 ***
355 TotRmsAbvGrd 6627.1664 5714.690 1.160 0.247 -4590.649 17800.000
356 TotRmsAbvGrd_str__"10" 19240.0000 14700.000 1.306 0.192 -9688.291 48200.000
357 TotRmsAbvGrd_str__"11" -29400.0000 21400.000 -1.376 0.169 -71300.000 12500.000
358 TotRmsAbvGrd_str__"12" -28120.0000 26000.000 -1.084 0.279 -79100.000 22800.000
359 TotRmsAbvGrd_str__"3" 11610.0000 26000.000 0.446 0.655 -39400.000 62700.000
360 TotRmsAbvGrd_str__"4" 7875.1932 19700.000 0.400 0.689 -30800.000 46500.000
361 TotRmsAbvGrd_str__"5" 7030.5917 14200.000 0.497 0.620 -20800.000 34800.000
362 TotRmsAbvGrd_str__"6" 6457.3213 8796.174 0.734 0.463 -10800.000 23700.000
363 TotRmsAbvGrd_str__"7" 4572.4521 4141.743 1.104 0.270 -3557.703 12700.000
364 TotRmsAbvGrd_str__"8" 656.6903 4550.841 0.144 0.885 -8276.515 9589.895
365 TotRmsAbvGrd_str__"9" 80.7595 9467.051 0.009 0.993 -18500.000 18700.000
366 TotalBsmtSF 1.0617 5.659 0.188 0.851 -10.047 12.170
367 WoodDeckSF 8.3264 8.614 0.967 0.334 -8.583 25.236
368 YearBuilt 217.2946 131.382 1.654 0.099 -40.605 475.194 .
369 YearRemodAdd 95.3702 88.610 1.076 0.282 -78.569 269.310
370 YrSold -324.0238 160.548 -2.018 0.044 -639.176 -8.871 *
371 YrSold_str__"2006" 668.6123 1982.587 0.337 0.736 -3223.164 4560.388
372 YrSold_str__"2007" -455.4234 1931.419 -0.236 0.814 -4246.759 3335.912
373 YrSold_str__"2008" -226.2129 1942.607 -0.116 0.907 -4039.510 3587.084
374 YrSold_str__"2009" -414.1245 1884.067 -0.220 0.826 -4112.508 3284.259
375 YrSold_str__"2010" 427.2069 2546.820 0.168 0.867 -4572.147 5426.561
-------------------------
--- Model statistic ---
R-squared : 0.921
Adj. R-squared : 0.89
F-statistic : 30
Prob (F-statistic): 8.685953892976382e-302
No. Observations : 1095
AIC : 25722
Df Residuals : 788
BIC : 27257
RMSE (test) : 29324
-------------------------
Maximum correlation between Reseduals and any data columns is 1.397018510768434e-12, with columns <LotArea>
Mean of train reseduals: 4.819381692910303e-08
------------------------------------- Random Forest -------------------------------------
-------------------------
RF model peramters:
{'bootstrap': True,
'ccp_alpha': 0.0,
'criterion': 'mse',
'max_depth': None,
'max_features': 'auto',
'max_leaf_nodes': None,
'max_samples': None,
'min_impurity_decrease': 0.0,
'min_impurity_split': None,
'min_samples_leaf': 1,
'min_samples_split': 2,
'min_weight_fraction_leaf': 0.0,
'n_estimators': 200,
'n_jobs': None,
'oob_score': True,
'random_state': None,
'verbose': 0,
'warm_start': False}
-------------------------
------------------------- --- Model statistic --- R^2 (test) : 0.8856059270305071 R^2 (train): 0.9792957005523424 RMSE (test): 23653 oob score : 0.8495393135794702 ------------------------- Maximum correlation between Reseduals and any data columns is 0.323711981173656, with columns <Neighborhood__Crawfor>